#!/usr/bin/env node // Emit a single-file HTML viewer for the cleaned hierarchy.json. // // Usage: // node scripts/render-hierarchy-html.mjs [path/to/hierarchy.json] [out.html] // // Defaults: // in = .cache/hf-data/warehouse/latest/hierarchy.json // out = output/hierarchy_explorer.html // // The viewer mirrors the family > composite > benchmark > slice > metric // shape produced by build_hierarchy_v2.py (and the warehouse pipeline), // after running through `lib/clean-hierarchy.ts`'s consolidation pass so // what you see matches what the model-detail page renders. import fs from "node:fs" import path from "node:path" import { pathToFileURL } from "node:url" import { register } from "node:module" const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), "..") // We need cleanHierarchy from a TS source; spawn `tsx` if available, else // fall back to dynamic import via the registered loader. Simpler: shell out // to `tsx` once and read the result back through stdout. To avoid that // dependency, re-implement the minimum in JS — but cleanHierarchy is // non-trivial. Easiest path: run via tsx if present, else fall back to // untouched hierarchy with a banner. async function loadCleanHierarchy() { try { // Use tsx-loader. `node --import tsx` is the common pattern, but here we // just dynamically import the .ts file under tsx's hooks if invoked via // `tsx` (the user can also run via plain `node` and we'll skip cleaning). const url = pathToFileURL(path.join(ROOT, "lib/clean-hierarchy.ts")).href const mod = await import(url) return mod.cleanHierarchy } catch { return null } } const inPath = process.argv[2] ?? path.join(ROOT, ".cache/hf-data/warehouse/latest/hierarchy.json") const outPath = process.argv[3] ?? path.join(ROOT, "output/hierarchy_explorer.html") if (!fs.existsSync(inPath)) { console.error(`hierarchy.json not found at ${inPath}`) process.exit(1) } const raw = JSON.parse(fs.readFileSync(inPath, "utf8")) // Try to load comparison-index.json from the same snapshot dir so the // cleaner can do score-equality-based aggregator dedup (llm-stats vs // canonical sources). let comparisonIndex = null const comparisonIndexPath = path.join(path.dirname(inPath), "comparison-index.json") if (fs.existsSync(comparisonIndexPath)) { try { comparisonIndex = JSON.parse(fs.readFileSync(comparisonIndexPath, "utf8")) } catch (err) { console.error(`comparison-index.json unreadable: ${err.message ?? err}`) } } let cleaned = raw let cleanerStatus = "skipped — run via tsx to apply lib/clean-hierarchy.ts" const cleanFn = await loadCleanHierarchy() if (cleanFn) { try { cleaned = cleanFn(structuredClone(raw), comparisonIndex) cleanerStatus = comparisonIndex ? "applied lib/clean-hierarchy.ts (with comparison-index)" : "applied lib/clean-hierarchy.ts (without comparison-index — score dedup skipped)" } catch (err) { cleanerStatus = `cleaner threw: ${err.message ?? err}` } } if (process.env.HIERARCHY_DEBUG) { const focus = ["big-bench","big-bench-hard","mmlu-pro","mmlu-pro-leaderboard","apex-v1","apex-agents","math-mc","mt-bench","gsm-mc"] for (const fam of cleaned.families ?? []) { if (!focus.includes(fam.key)) continue console.error("---", fam.display_name, "(" + fam.key + ")", "---") for (const c of fam.composites||[]) console.error(" composite", c.key, c.benchmarks?.map(b=>b.key)) for (const b of fam.standalone_benchmarks||[]) console.error(" standalone", b.key, "splits:", (b.slices||[]).length) for (const b of fam.benchmarks||[]) console.error(" direct", b.key, "splits:", (b.slices||[]).length) } } const families = cleaned.families ?? [] const benchmarkIndex = cleaned.benchmark_index ?? [] const stats = cleaned.stats ?? {} function flattenBenchmarks(family) { const out = [] for (const b of family.benchmarks ?? []) out.push({ ...b, _scope: "family" }) for (const b of family.standalone_benchmarks ?? []) out.push({ ...b, _scope: "standalone" }) for (const c of family.composites ?? []) { for (const b of c.benchmarks ?? []) out.push({ ...b, _scope: "composite", _compositeKey: c.key, _compositeName: c.display_name }) } return out } const totalBenchmarks = families.reduce((sum, f) => sum + flattenBenchmarks(f).length, 0) const overlapsCount = benchmarkIndex.length // Pre-compute per-family overlap appearances so the family panel can show // "appears in X of this family's benchmarks" without scanning the entire // index per render. const overlapsByFamily = new Map() for (const entry of benchmarkIndex) { for (const app of entry.appearances ?? []) { const list = overlapsByFamily.get(app.family_key) ?? [] list.push({ canonicalKey: entry.key, canonicalDisplayName: entry.display_name, ...app }) overlapsByFamily.set(app.family_key, list) } } const payload = { cleanerStatus, generatedAt: cleaned.generated_at ?? null, schemaVersion: cleaned.schema_version ?? null, stats: { families: families.length, benchmarks: totalBenchmarks, overlaps: overlapsCount, ...stats, }, families, benchmarkIndex, overlapsByFamily: Array.from(overlapsByFamily.entries()), } const html = `