evijit HF Staff Claude Opus 4.7 (1M context) commited on
Commit
beb4e3a
·
1 Parent(s): 718288a

view-data: wrap JSON-typed parquet columns in to_json() to dodge binding crash

Browse files

The deployed Space surfaced "Invalid Error: don't know what type:"
on every eval-summary / model-summary query because the upstream
parquet schema landed JSON-typed fields nested inside structs —
specifically:
- eval_results_view.evalcards_annotations
(.variant_divergence.differing_fields[].values JSON,
.cross_party_divergence.differing_fields[].values JSON)
- evals_view.benchmark_card (.flagged_fields JSON)

The DuckDB Node binding can't materialise nested JSON, so SELECT-ing
those columns row-by-row crashes the whole query. Wrap the two
offending columns with to_json() in every SELECT (CELL_JOIN_COLUMNS,
EVAL_LIST_COLUMNS, getBenchmarkMetadataMap) so the binding sees a
single VARCHAR per row, and add a parseMaybeJson helper that
JSON.parses the string back to its original shape downstream. Local
prod build with DATA_BACKEND=v2 now serves /api/model-summary,
/api/eval-summary, and /api/eval-list at 200.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. lib/view-data.ts +50 -8
lib/view-data.ts CHANGED
@@ -68,7 +68,7 @@ const EVAL_LIST_COLUMNS = `
68
  metric_config, models_count, evaluator_names, source_types,
69
  latest_source_name, third_party_ratio,
70
  missing_generation_config_count, best_model, worst_model,
71
- avg_score, avg_score_norm, has_card, benchmark_card,
72
  is_aggregated, aggregate_sources, tags,
73
  metrics_count, metric_names, instance_data, top_score,
74
  subtasks_count, is_summary_score, summary_eval_ids,
@@ -77,8 +77,18 @@ const EVAL_LIST_COLUMNS = `
77
  source_data
78
  `
79
 
 
 
 
 
 
 
 
 
 
 
80
  const CELL_JOIN_COLUMNS = `
81
- r.*,
82
  e.evaluation_name AS eval_evaluation_name,
83
  e.canonical_display_name AS eval_canonical_display_name,
84
  e.benchmark_id AS eval_benchmark_id,
@@ -94,7 +104,7 @@ const CELL_JOIN_COLUMNS = `
94
  e.category AS eval_category,
95
  e.metric_config AS eval_metric_config,
96
  e.source_data AS eval_source_data,
97
- e.benchmark_card AS eval_benchmark_card,
98
  e.tags AS eval_tags,
99
  e.is_summary_score AS eval_is_summary_score,
100
  e.summary_eval_ids AS eval_summary_eval_ids
@@ -210,6 +220,23 @@ function optionalString(value: unknown) {
210
  return typeof value === "string" && value.length > 0 ? value : undefined
211
  }
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  function asArray<T>(value: unknown): T[] {
214
  return Array.isArray(value) ? value as T[] : []
215
  }
@@ -309,7 +336,12 @@ function modelInfoFromModelRow(row: Row): ModelInfo {
309
  function resultFromCell(row: Row): EvaluationResult {
310
  const scoreDetails = scoreDetailsFromRow(row)
311
  const generationConfig = row.generation_config as GenerationConfig | undefined
312
- const annotations = row.evalcards_annotations
 
 
 
 
 
313
 
314
  return {
315
  evaluation_name: asString(row.metric_display_name ?? row.eval_evaluation_name ?? row.metric_id, "Score"),
@@ -471,7 +503,7 @@ export async function getEvalListData(): Promise<{
471
  totalModels: number
472
  }> {
473
  const [evals, countRows] = await Promise.all([
474
- readRows<BenchmarkEvalListItem>(
475
  `SELECT ${EVAL_LIST_COLUMNS}
476
  FROM evals_view
477
  ORDER BY evaluation_name ASC`
@@ -479,8 +511,15 @@ export async function getEvalListData(): Promise<{
479
  readRows<{ n: number }>("SELECT COUNT(*) AS n FROM models_view"),
480
  ])
481
 
 
 
 
 
 
 
 
482
  return {
483
- evals,
484
  totalModels: asNumber(countRows[0]?.n),
485
  }
486
  }
@@ -600,6 +639,9 @@ export async function getEvalSummaryById(evalId: string): Promise<BenchmarkEvalS
600
 
601
  const summary = {
602
  ...evalRow,
 
 
 
603
  model_results: cellRows.map(reshapeCellToModelResult),
604
  } as BenchmarkEvalSummary
605
 
@@ -723,14 +765,14 @@ export async function getBenchmarkMetadataMap(): Promise<Record<string, Benchmar
723
  `SELECT evaluation_id, evaluation_name,
724
  family_id AS composite_benchmark_key,
725
  benchmark_id,
726
- benchmark_card
727
  FROM evals_view
728
  WHERE benchmark_card IS NOT NULL`
729
  )
730
  const result: Record<string, BenchmarkCard> = {}
731
 
732
  for (const row of rows) {
733
- const card = row.benchmark_card as BenchmarkCard | null | undefined
734
  if (!card) continue
735
 
736
  const keys = [
 
68
  metric_config, models_count, evaluator_names, source_types,
69
  latest_source_name, third_party_ratio,
70
  missing_generation_config_count, best_model, worst_model,
71
+ avg_score, avg_score_norm, has_card, to_json(benchmark_card) AS benchmark_card,
72
  is_aggregated, aggregate_sources, tags,
73
  metrics_count, metric_names, instance_data, top_score,
74
  subtasks_count, is_summary_score, summary_eval_ids,
 
77
  source_data
78
  `
79
 
80
+ // The deployed Space started returning 500s ("Invalid Error: don't
81
+ // know what type:") on every eval-results / model-summary query after
82
+ // a parquet snapshot bump that landed nested `JSON` fields inside
83
+ // structs (e.g. `evalcards_annotations.variant_divergence.differing_
84
+ // fields[].values JSON` and `benchmark_card.flagged_fields JSON`).
85
+ // The DuckDB Node binding crashes when it tries to materialise those
86
+ // nested-JSON structs row-by-row. Wrap the offending columns with
87
+ // `to_json(...)` so the binding sees a single VARCHAR per row; the
88
+ // reshape helpers JSON.parse the string back into the same shape
89
+ // downstream code expects.
90
  const CELL_JOIN_COLUMNS = `
91
+ r.* REPLACE (to_json(r.evalcards_annotations) AS evalcards_annotations),
92
  e.evaluation_name AS eval_evaluation_name,
93
  e.canonical_display_name AS eval_canonical_display_name,
94
  e.benchmark_id AS eval_benchmark_id,
 
104
  e.category AS eval_category,
105
  e.metric_config AS eval_metric_config,
106
  e.source_data AS eval_source_data,
107
+ to_json(e.benchmark_card) AS eval_benchmark_card,
108
  e.tags AS eval_tags,
109
  e.is_summary_score AS eval_is_summary_score,
110
  e.summary_eval_ids AS eval_summary_eval_ids
 
220
  return typeof value === "string" && value.length > 0 ? value : undefined
221
  }
222
 
223
+ // Some parquet columns ship JSON-typed fields nested inside structs
224
+ // that the DuckDB Node binding can't materialise (crashes the entire
225
+ // query with "don't know what type:"). For those columns the SELECT
226
+ // wraps the value in `to_json(...)` so the binding sees a single
227
+ // VARCHAR; this helper undoes the wrap. If the value is already an
228
+ // object (legacy snapshots without the to_json wrap, or local dev
229
+ // where the binding handled the type), pass it through unchanged.
230
+ function parseMaybeJson(value: unknown): unknown {
231
+ if (typeof value !== "string") return value
232
+ if (value === "" || value === "null") return null
233
+ try {
234
+ return JSON.parse(value)
235
+ } catch {
236
+ return value
237
+ }
238
+ }
239
+
240
  function asArray<T>(value: unknown): T[] {
241
  return Array.isArray(value) ? value as T[] : []
242
  }
 
336
  function resultFromCell(row: Row): EvaluationResult {
337
  const scoreDetails = scoreDetailsFromRow(row)
338
  const generationConfig = row.generation_config as GenerationConfig | undefined
339
+ // evalcards_annotations arrives JSON-encoded — the parquet schema
340
+ // nests a JSON-typed field which the DuckDB Node binding can't
341
+ // materialise directly, so view-data's SELECT wraps the whole
342
+ // column in to_json() and we parse it back on the JS side. See
343
+ // CELL_JOIN_COLUMNS for the wrapping site.
344
+ const annotations = parseMaybeJson(row.evalcards_annotations)
345
 
346
  return {
347
  evaluation_name: asString(row.metric_display_name ?? row.eval_evaluation_name ?? row.metric_id, "Score"),
 
503
  totalModels: number
504
  }> {
505
  const [evals, countRows] = await Promise.all([
506
+ readRows<BenchmarkEvalListItem & { benchmark_card?: unknown }>(
507
  `SELECT ${EVAL_LIST_COLUMNS}
508
  FROM evals_view
509
  ORDER BY evaluation_name ASC`
 
511
  readRows<{ n: number }>("SELECT COUNT(*) AS n FROM models_view"),
512
  ])
513
 
514
+ // benchmark_card is JSON-encoded at the SQL layer; parse before
515
+ // handing it to consumers that expect the object shape.
516
+ const decoded = evals.map((row) => ({
517
+ ...row,
518
+ benchmark_card: parseMaybeJson(row.benchmark_card),
519
+ })) as BenchmarkEvalListItem[]
520
+
521
  return {
522
+ evals: decoded,
523
  totalModels: asNumber(countRows[0]?.n),
524
  }
525
  }
 
639
 
640
  const summary = {
641
  ...evalRow,
642
+ // benchmark_card arrives JSON-encoded (the parquet schema nests a
643
+ // JSON-typed field — see CELL_JOIN_COLUMNS / EVAL_LIST_COLUMNS).
644
+ benchmark_card: parseMaybeJson(evalRow.benchmark_card),
645
  model_results: cellRows.map(reshapeCellToModelResult),
646
  } as BenchmarkEvalSummary
647
 
 
765
  `SELECT evaluation_id, evaluation_name,
766
  family_id AS composite_benchmark_key,
767
  benchmark_id,
768
+ to_json(benchmark_card) AS benchmark_card
769
  FROM evals_view
770
  WHERE benchmark_card IS NOT NULL`
771
  )
772
  const result: Record<string, BenchmarkCard> = {}
773
 
774
  for (const row of rows) {
775
+ const card = parseMaybeJson(row.benchmark_card) as BenchmarkCard | null | undefined
776
  if (!card) continue
777
 
778
  const keys = [