Spaces:

evaleval
/

general-eval-card

Running

App Files Files Community

Swap backend data

by j-chim - opened 26 days ago

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+4303

-1242429

This view is limited to 50 files because it contains too many changes. See the raw diff here.

Files changed (50) hide show

.gitignore +11 -0
Dockerfile +17 -17
app/developers/[id]/page.tsx +0 -46
app/evals/[id]/page.tsx +32 -124
app/evals/page.tsx +263 -9
app/globals.css +291 -30
app/models/page.tsx +35 -103
app/page.tsx +1 -1
components/benchmark-detail.tsx +3 -3
components/eval-detail.tsx +125 -411
components/family-table.tsx +234 -100
components/param-range-picker.tsx +244 -0
components/signals/benchmark-signals-strip.tsx +626 -0
components/signals/corpus-dashboard.tsx +61 -42
components/signals/corpus-signals-strip.tsx +21 -11
data/benchmarks.json +0 -90
data/developers.json +0 -3150
data/developers/0-hero.json +0 -47
data/developers/01-ai.json +0 -417
data/developers/1-800-llms.json +0 -33
data/developers/1024m.json +0 -33
data/developers/152334h.json +0 -19
data/developers/1tuanpham.json +0 -33
data/developers/3rd-degree-burn.json +0 -61
data/developers/4season.json +0 -19
data/developers/aaditya.json +0 -19
data/developers/aalf.json +0 -61
data/developers/aashraf995.json +0 -61
data/developers/abacusai.json +0 -145
data/developers/abacusresearch.json +0 -19
data/developers/abhishek.json +0 -75
data/developers/abideen.json +0 -19
data/developers/adamo1139.json +0 -19
data/developers/adriszmar.json +0 -19
data/developers/aellm.json +0 -33
data/developers/aevalone.json +0 -19
data/developers/agentlans.json +0 -131
data/developers/agi-0.json +0 -47
data/developers/ahdoot.json +0 -33
data/developers/ahjeong.json +0 -33
data/developers/ahmeda335.json +0 -19
data/developers/ai-mo.json +0 -33
data/developers/ai-sweden-models.json +0 -33
data/developers/ai2.json +0 -89
data/developers/ai21.json +0 -364
data/developers/ai21labs.json +0 -19
data/developers/ai4bharat.json +0 -19
data/developers/ai4free.json +0 -33
data/developers/aicoressecurity.json +0 -61
data/developers/aidc-ai.json +0 -19

.gitignore CHANGED Viewed

@@ -31,3 +31,14 @@ next-env.d.ts
 .cache/
 pipeline_revised.py
 .local-data/

 .cache/
 pipeline_revised.py
 .local-data/
+# Local data dumps. The runtime reads from .cache/hf-data/duckdb parquets;
+# anything under /data/ is leftover from offline scripts and should not
+# ship in git. The survey schema (loaded statically by app/survey) is the
+# one exception.
+/data/*
+!/data/survey/
+/data/survey/*
+!/data/survey/eval-schema-fields.json
+mock_design/
+shoot.mjs

Dockerfile CHANGED Viewed

@@ -9,20 +9,18 @@ ARG PNPM_VERSION=10.25.0
 # Build-time data-source configuration. HF Spaces "Variables" are NOT injected
 # into Docker RUN steps automatically — only into the final runtime — so we
-# bake the DuckDB-mode defaults here. `cache-hf-data.mjs` reads these to know
-# which dataset to clone and to apply lean cache mode (skip JSON-fallback
-# artifacts). Override at build time via `--build-arg HF_DATASET_REPO=...`.
-ARG DATA_BACKEND=duckdb
 ARG HF_DATASET_REPO=https://huggingface.co/datasets/evaleval/card_backend
-# Static prerender (`next build`) executes route handlers, which call
-# `getModelCards` etc. → `lib/duckdb-data.ts`, which requires
-# `LOCAL_PIPELINE_OUTPUT`. The cache populated by `cache-hf-data.mjs`
-# lives at `/app/.cache/hf-data`. `HF_DATA_OFFLINE=1` keeps the metadata
-# fetchers (`lib/hf-data.ts`) from attempting `evaleval/card_backend`
-# network reads with `revalidate: 0` (which Next 15 treats as dynamic
-# and fails the static export of `/`).
 ENV DATA_BACKEND=${DATA_BACKEND} \
     HF_DATASET_REPO=${HF_DATASET_REPO} \
     LOCAL_PIPELINE_OUTPUT=/app/.cache/hf-data \
     HF_DATA_LOCAL_DIR=/app/.cache/hf-data \
     HF_DATA_OFFLINE=1
@@ -49,13 +47,15 @@ RUN pnpm run build
 FROM node:18-bullseye-slim AS runner
 WORKDIR /app
-# Runtime needs the same DuckDB-mode envs that the builder used. HF Space
-# Variables aren't set on this Space, and Docker multi-stage doesn't carry
-# ENVs across stages — without these, lib/duckdb-data.ts throws
-# "DATA_BACKEND=duckdb requires LOCAL_PIPELINE_OUTPUT" at request time and
-# every model/eval/developer endpoint returns empty.
 ENV NODE_ENV=production \
-    DATA_BACKEND=duckdb \
     LOCAL_PIPELINE_OUTPUT=/app/.cache/hf-data \
     HF_DATA_LOCAL_DIR=/app/.cache/hf-data \
     HF_DATA_OFFLINE=1

 # Build-time data-source configuration. HF Spaces "Variables" are NOT injected
 # into Docker RUN steps automatically — only into the final runtime — so we
+# bake the selected backend here. `DATA_BACKEND=v2` reads `SNAPSHOT_URL`
+# directly; legacy DuckDB mode still clones `HF_DATASET_REPO` into the cache.
+# Override at build time via `--build-arg ...`.
+ARG DATA_BACKEND=v2
 ARG HF_DATASET_REPO=https://huggingface.co/datasets/evaleval/card_backend
+ARG SNAPSHOT_URL=https://huggingface.co/datasets/j-chim/temp_evalcard_backend/resolve/main/warehouse/2026-05-03T21-46-50Z
+# Static prerender (`next build`) executes route handlers. In legacy mode the
+# cache populated by `cache-hf-data.mjs` lives at `/app/.cache/hf-data`; in v2
+# the cache step is skipped and the app reads the pinned Stage J snapshot.
 ENV DATA_BACKEND=${DATA_BACKEND} \
     HF_DATASET_REPO=${HF_DATASET_REPO} \
+    SNAPSHOT_URL=${SNAPSHOT_URL} \
     LOCAL_PIPELINE_OUTPUT=/app/.cache/hf-data \
     HF_DATA_LOCAL_DIR=/app/.cache/hf-data \
     HF_DATA_OFFLINE=1
 FROM node:18-bullseye-slim AS runner
 WORKDIR /app
+ARG DATA_BACKEND=v2
+ARG SNAPSHOT_URL=https://huggingface.co/datasets/j-chim/temp_evalcard_backend/resolve/main/warehouse/2026-05-03T21-46-50Z
+# Runtime needs the same data-source envs that the builder used. Docker
+# multi-stage doesn't carry ENVs across stages, so keep backend selection and
+# snapshot/cache pointers explicit here too.
 ENV NODE_ENV=production \
+    DATA_BACKEND=${DATA_BACKEND} \
+    SNAPSHOT_URL=${SNAPSHOT_URL} \
     LOCAL_PIPELINE_OUTPUT=/app/.cache/hf-data \
     HF_DATA_LOCAL_DIR=/app/.cache/hf-data \
     HF_DATA_OFFLINE=1

app/developers/[id]/page.tsx CHANGED Viewed

@@ -9,7 +9,6 @@ import { InfiniteScrollSentinel } from "@/components/infinite-scroll"
 import { ModelTable } from "@/components/model-table"
 import { Navigation } from "@/components/navigation"
 import type { BenchmarkCard } from "@/lib/benchmark-schema"
-import { lookupBenchmarkCard } from "@/lib/benchmark-metadata-utils"
 import { fetchDeveloperSummary, fetchBenchmarkMetadata } from "@/lib/dashboard-data-client"
 const PAGE_SIZE = 40
@@ -59,24 +58,6 @@ export default function DeveloperDetailPage() {
     [models]
   )
-  // Collect unique domains from benchmarks this developer's models are evaluated on
-  const domainCoverage = useMemo(() => {
-    const domainMap = new Map<string, Set<string>>()
-    for (const model of models) {
-      for (const { benchmark } of model.top_scores) {
-        const card = lookupBenchmarkCard(benchmarkCards, benchmark)
-        for (const domain of card?.benchmark_details?.domains ?? []) {
-          const existing = domainMap.get(domain) ?? new Set()
-          existing.add(benchmark)
-          domainMap.set(domain, existing)
-        }
-      }
-    }
-    return Array.from(domainMap.entries())
-      .map(([domain, benchmarks]) => ({ domain, count: benchmarks.size }))
-      .sort((a, b) => b.count - a.count)
-  }, [models, benchmarkCards])
   const filteredModels = useMemo(() => {
     const query = searchQuery.trim().toLowerCase()
     const filtered = query
@@ -207,12 +188,6 @@ export default function DeveloperDetailPage() {
               · <span className="text-[color:var(--fg)] tabular-nums font-semibold mr-1">{totalResults.toLocaleString()}</span>
               results
             </span>
-            {domainCoverage.length > 0 && (
-              <span>
-                · <span className="text-[color:var(--fg)] tabular-nums font-semibold mr-1">{domainCoverage.length}</span>
-                domains
-              </span>
-            )}
           </div>
           <span className="hidden h-5 w-px bg-[color:var(--border-soft)] sm:block" />
@@ -239,27 +214,6 @@ export default function DeveloperDetailPage() {
           </select>
         </div>
-        {/* DOMAIN COVERAGE — hairline tag row ---------------------- */}
-        {domainCoverage.length > 0 && (
-          <div className="mb-8">
-            <div className="kicker mb-3">Benchmark domain coverage</div>
-            <div className="flex flex-wrap gap-1.5">
-              {domainCoverage.map(({ domain, count }) => (
-                <span
-                  key={domain}
-                  className="ec-tag outline"
-                  style={{ textTransform: "none", letterSpacing: "normal", fontFamily: "var(--font-sans)" }}
-                >
-                  <span className="text-[12px] font-medium text-[color:var(--fg)] capitalize">{domain}</span>
-                  <span className="font-mono text-[10px] tabular-nums text-[color:var(--fg-muted)]">
-                    {count}
-                  </span>
-                </span>
-              ))}
-            </div>
-          </div>
-        )}
         {/* TABLE ---------------------------------------------------- */}
         {filteredModels.length === 0 ? (
           <div className="border border-dashed border-[color:var(--border-soft)] bg-[color:var(--bg-warm)] py-12 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">

 import { ModelTable } from "@/components/model-table"
 import { Navigation } from "@/components/navigation"
 import type { BenchmarkCard } from "@/lib/benchmark-schema"
 import { fetchDeveloperSummary, fetchBenchmarkMetadata } from "@/lib/dashboard-data-client"
 const PAGE_SIZE = 40
     [models]
   )
   const filteredModels = useMemo(() => {
     const query = searchQuery.trim().toLowerCase()
     const filtered = query
               · <span className="text-[color:var(--fg)] tabular-nums font-semibold mr-1">{totalResults.toLocaleString()}</span>
               results
             </span>
           </div>
           <span className="hidden h-5 w-px bg-[color:var(--border-soft)] sm:block" />
           </select>
         </div>
         {/* TABLE ---------------------------------------------------- */}
         {filteredModels.length === 0 ? (
           <div className="border border-dashed border-[color:var(--border-soft)] bg-[color:var(--bg-warm)] py-12 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">

app/evals/[id]/page.tsx CHANGED Viewed

@@ -6,27 +6,11 @@ import Link from "next/link"
 import { ArrowLeft, ArrowUpRight, BarChart3, Grid3X3, Search } from "lucide-react"
 import { Navigation } from "@/components/navigation"
 import { EvalDetail } from "@/components/eval-detail"
 import { useAudienceMode } from "@/components/audience-mode-provider"
 import type { BenchmarkEvalSummary } from "@/lib/eval-processing"
 import { fetchEvalSummary } from "@/lib/dashboard-data-client"
-const PARAM_RANGE_VALUES = [1, 2, 3, 4, 6, 8, 10, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 500] as const
-const PARAM_RANGE_MARKERS = [
-  { label: "< 1B", step: 0 },
-  { label: "6B", step: PARAM_RANGE_VALUES.indexOf(6) },
-  { label: "12B", step: PARAM_RANGE_VALUES.indexOf(12) },
-  { label: "32B", step: PARAM_RANGE_VALUES.indexOf(32) },
-  { label: "128B", step: PARAM_RANGE_VALUES.indexOf(128) },
-  { label: "> 500B", step: PARAM_RANGE_VALUES.length - 1 },
-] as const
-function formatParamBoundLabel(step: number, bound: "min" | "max") {
-  const maxStepIndex = PARAM_RANGE_VALUES.length - 1
-  if (bound === "min" && step <= 0) return "< 1B"
-  if (bound === "max" && step >= maxStepIndex) return "> 500B"
-  const value = PARAM_RANGE_VALUES[step]
-  return value != null ? `${value}B` : "Not reported"
-}
 export default function EvalDetailPage() {
   const params = useParams()
@@ -192,10 +176,7 @@ function CompositeEvalView({
     <div className="space-y-10">
       {/* HERO ------------------------------------------------------------- */}
       <header className="motion-academic-enter">
-        <div className="kicker kicker-accent mb-2">
-          {isPolicy ? "Benchmark suite" : "Composite · §3.2"}
-        </div>
-        <h1 className="ec-page-h1" style={{ marginTop: 4 }}>{summary.evaluation_name}</h1>
         <div
           className="mb-5 flex flex-wrap items-center gap-3 font-mono text-[11px] uppercase tracking-[0.12em]"
           style={{ color: "var(--fg-muted)" }}
@@ -434,7 +415,7 @@ function MatrixLeaderboard({
   const [page, setPage] = useState(1)
   const [hiddenCols, setHiddenCols] = useState<Set<string>>(new Set())
   const [minParamStep, setMinParamStep] = useState(0)
-  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_VALUES.length - 1)
   const PAGE_SIZE = 50
   const metricDirection = useMemo(() => {
@@ -470,9 +451,9 @@ function MatrixLeaderboard({
         const avg = validScores.length > 0
           ? validScores.reduce((a, b) => a + b, 0) / validScores.length
           : 0
-        let sizeB: number | null = null
-        const sizeMatch = (data.name + " " + id).match(/\b(\d+(?:\.\d+)?)\s*[bB]\b/)
-        if (sizeMatch) sizeB = parseFloat(sizeMatch[1])
         return { id, name: data.name, developer: data.developer, avg, scores: data.scores, sizeB }
       })
@@ -496,9 +477,11 @@ function MatrixLeaderboard({
     })
   }, [models, sortCol, sortAsc])
-  const maxStepIndex = PARAM_RANGE_VALUES.length - 1
-  const numericMinParams = minParamStep <= 0 ? null : (PARAM_RANGE_VALUES[minParamStep] ?? null)
-  const numericMaxParams = maxParamStep >= maxStepIndex ? null : (PARAM_RANGE_VALUES[maxParamStep] ?? null)
   const query = search.trim().toLowerCase()
   const filteredModels = sortedModels.filter((m) => {
@@ -507,8 +490,9 @@ function MatrixLeaderboard({
       m.developer.toLowerCase().includes(query) ||
       m.id.toLowerCase().includes(query)
     )) return false
-    if (numericMinParams != null && (m.sizeB == null || m.sizeB < numericMinParams)) return false
-    if (numericMaxParams != null && (m.sizeB == null || m.sizeB > numericMaxParams)) return false
     return true
   })
@@ -594,99 +578,23 @@ function MatrixLeaderboard({
           />
         </div>
-        {/* Param slider */}
-        <div
-          className="flex items-center gap-3 px-4 py-2"
-          style={{ border: "1px solid var(--border-soft)", background: "var(--bg-warm)" }}
-        >
-          <span
-            className="shrink-0 font-mono uppercase tracking-[0.14em]"
-            style={{ fontSize: 10, color: "var(--fg-subtle)" }}
-          >
-            Params
-          </span>
-          <div className="min-w-0 flex-1 w-[min(92vw,300px)]">
-            <div className="relative mb-1 h-4 text-[10px]" style={{ color: "var(--fg-subtle)" }}>
-              {PARAM_RANGE_MARKERS.map((marker) => (
-                <span
-                  key={marker.label}
-                  className="absolute top-0 whitespace-nowrap font-mono"
-                  style={{
-                    left: `${(marker.step / maxStepIndex) * 100}%`,
-                    transform:
-                      marker.step === 0 ? "translateX(0)"
-                        : marker.step === maxStepIndex ? "translateX(-100%)"
-                          : "translateX(-50%)",
-                  }}
-                >
-                  {marker.label}
-                </span>
-              ))}
-            </div>
-            <div className="relative h-4">
-              <div
-                className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2"
-                style={{ background: "var(--border-strong)" }}
-              />
-              <div className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2">
-                <div
-                  className="absolute inset-y-0"
-                  style={{
-                    background: "var(--fg)",
-                    left: `${(minParamStep / maxStepIndex) * 100}%`,
-                    right: `${Math.max(100 - (maxParamStep / maxStepIndex) * 100, 0)}%`,
-                  }}
-                />
-              </div>
-              <div className="absolute inset-x-1.5 top-1/2 -translate-y-1/2">
-                {PARAM_RANGE_VALUES.map((_, stepIndex) => (
-                  <span
-                    key={`param-tick-${stepIndex}`}
-                    className="absolute top-0 h-2 w-px -translate-x-1/2"
-                    style={{ left: `${(stepIndex / maxStepIndex) * 100}%`, background: "var(--border-soft)" }}
-                    aria-hidden="true"
-                  />
-                ))}
-              </div>
-              <input
-                type="range"
-                min={0}
-                max={maxStepIndex}
-                step={1}
-                value={minParamStep}
-                onChange={(e) => {
-                  const v = Number(e.target.value)
-                  setMinParamStep(Math.min(v, maxParamStep))
-                }}
-                className="param-range-input"
-                aria-label="Minimum parameter filter"
-              />
-              <input
-                type="range"
-                min={0}
-                max={maxStepIndex}
-                step={1}
-                value={maxParamStep}
-                onChange={(e) => {
-                  const v = Number(e.target.value)
-                  setMaxParamStep(Math.max(v, minParamStep))
-                }}
-                className="param-range-input"
-                aria-label="Maximum parameter filter"
-              />
-            </div>
-          </div>
-          <span
-            className="shrink-0 font-mono"
-            style={{ fontSize: 10, color: "var(--fg-muted)" }}
-          >
-            {formatParamBoundLabel(minParamStep, "min")} – {formatParamBoundLabel(maxParamStep, "max")}
-          </span>
-        </div>
         <div
           className="font-mono uppercase tracking-[0.14em] whitespace-nowrap ml-auto"

 import { ArrowLeft, ArrowUpRight, BarChart3, Grid3X3, Search } from "lucide-react"
 import { Navigation } from "@/components/navigation"
 import { EvalDetail } from "@/components/eval-detail"
+import { ParamRangePicker } from "@/components/param-range-picker"
 import { useAudienceMode } from "@/components/audience-mode-provider"
 import type { BenchmarkEvalSummary } from "@/lib/eval-processing"
 import { fetchEvalSummary } from "@/lib/dashboard-data-client"
+import { PARAM_RANGE_MAX_INDEX, parseParamsBillionsFromModelName, paramStepToNumeric } from "@/lib/param-range"
 export default function EvalDetailPage() {
   const params = useParams()
     <div className="space-y-10">
       {/* HERO ------------------------------------------------------------- */}
       <header className="motion-academic-enter">
+        <h1 className="ec-page-h1">{summary.evaluation_name}</h1>
         <div
           className="mb-5 flex flex-wrap items-center gap-3 font-mono text-[11px] uppercase tracking-[0.12em]"
           style={{ color: "var(--fg-muted)" }}
   const [page, setPage] = useState(1)
   const [hiddenCols, setHiddenCols] = useState<Set<string>>(new Set())
   const [minParamStep, setMinParamStep] = useState(0)
+  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_MAX_INDEX)
   const PAGE_SIZE = 50
   const metricDirection = useMemo(() => {
         const avg = validScores.length > 0
           ? validScores.reduce((a, b) => a + b, 0) / validScores.length
           : 0
+        const sizeB =
+          parseParamsBillionsFromModelName(data.name) ??
+          parseParamsBillionsFromModelName(id)
         return { id, name: data.name, developer: data.developer, avg, scores: data.scores, sizeB }
       })
     })
   }, [models, sortCol, sortAsc])
+  const numericMinParams = paramStepToNumeric(minParamStep, "min")
+  const numericMaxParams = paramStepToNumeric(maxParamStep, "max")
+  const [showUnknownSize, setShowUnknownSize] = useState(true)
+  const hasParameterData = useMemo(() => models.some((m) => m.sizeB != null), [models])
   const query = search.trim().toLowerCase()
   const filteredModels = sortedModels.filter((m) => {
       m.developer.toLowerCase().includes(query) ||
       m.id.toLowerCase().includes(query)
     )) return false
+    if (m.sizeB == null) return showUnknownSize
+    if (numericMinParams != null && m.sizeB < numericMinParams) return false
+    if (numericMaxParams != null && m.sizeB > numericMaxParams) return false
     return true
   })
           />
         </div>
+        {hasParameterData && (
+          <ParamRangePicker
+            variant="inline"
+            headline="Params"
+            minStep={minParamStep}
+            maxStep={maxParamStep}
+            onMinChange={setMinParamStep}
+            onMaxChange={setMaxParamStep}
+            onReset={() => {
+              setMinParamStep(0)
+              setMaxParamStep(PARAM_RANGE_MAX_INDEX)
+            }}
+            showUnknownSize={showUnknownSize}
+            onShowUnknownSizeChange={setShowUnknownSize}
+            className="min-w-[260px] flex-1 sm:max-w-[420px]"
+          />
+        )}
         <div
           className="font-mono uppercase tracking-[0.14em] whitespace-nowrap ml-auto"

app/evals/page.tsx CHANGED Viewed

@@ -1,13 +1,15 @@
 "use client"
 import { useCallback, useDeferredValue, useEffect, useMemo, useState } from "react"
-import { Search } from "lucide-react"
 import { FamilyTable } from "@/components/family-table"
 import { InfiniteScrollSentinel } from "@/components/infinite-scroll"
 import { Navigation } from "@/components/navigation"
 import type { EvalHierarchy, HierarchyFamily } from "@/lib/backend-artifacts"
-import { fetchEvalHierarchy, fetchEvalList } from "@/lib/dashboard-data-client"
 const PAGE_SIZE = 60
@@ -54,17 +56,26 @@ function familyBenchmarkCount(fam: HierarchyFamily): number {
 export default function EvalsPage() {
   const [hierarchy, setHierarchy] = useState<EvalHierarchy | null>(null)
   const [totalModels, setTotalModels] = useState<number>(0)
   const [loading, setLoading] = useState(true)
   const [searchQuery, setSearchQuery] = useState("")
   const [sortBy, setSortBy] = useState<FamilySort>("results")
   const [visibleCount, setVisibleCount] = useState(PAGE_SIZE)
   const deferredSearchQuery = useDeferredValue(searchQuery)
   useEffect(() => {
-    Promise.all([fetchEvalHierarchy(), fetchEvalList()])
-      .then(([h, list]) => {
         setHierarchy(h)
         setTotalModels(list.totalModels)
       })
       .catch(console.error)
       .finally(() => setLoading(false))
@@ -72,6 +83,86 @@ export default function EvalsPage() {
   const families = hierarchy?.families ?? []
   const filteredFamilies = useMemo(() => {
     const query = deferredSearchQuery.trim().toLowerCase()
     let list = families
@@ -85,6 +176,20 @@ export default function EvalsPage() {
       )
     }
     return list.slice().sort((a, b) => {
       switch (sortBy) {
         case "name":
@@ -98,11 +203,11 @@ export default function EvalsPage() {
           return familyEvalsCount(b) - familyEvalsCount(a)
       }
     })
-  }, [families, deferredSearchQuery, sortBy])
   useEffect(() => {
     setVisibleCount(PAGE_SIZE)
-  }, [deferredSearchQuery, sortBy])
   const visibleFamilies = useMemo(
     () => filteredFamilies.slice(0, visibleCount),
@@ -126,8 +231,7 @@ export default function EvalsPage() {
         <p className="ec-page-lede">
           Evaluations are grouped into <strong>families</strong>. A family holds one or more
           benchmarks; each benchmark has one or more slices; each slice reports one or more
-          metrics. Metrics are not commensurable across rows — compare within a cell, not
-          across cells.
         </p>
         {/* META ROW ------------------------------------------------- */}
@@ -180,6 +284,41 @@ export default function EvalsPage() {
           <div className="grow" />
           <select
             className="ec-select"
             value={sortBy}
@@ -192,6 +331,115 @@ export default function EvalsPage() {
           </select>
         </div>
         {/* TABLE ---------------------------------------------------- */}
         {loading ? (
           <div className="py-24 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">
@@ -214,7 +462,13 @@ export default function EvalsPage() {
             </button>
           </div>
         ) : (
-          <FamilyTable families={visibleFamilies} totalModels={totalModels} />
         )}
         <InfiniteScrollSentinel

 "use client"
 import { useCallback, useDeferredValue, useEffect, useMemo, useState } from "react"
+import { ChevronDown, ChevronUp, Search, Tag } from "lucide-react"
 import { FamilyTable } from "@/components/family-table"
 import { InfiniteScrollSentinel } from "@/components/infinite-scroll"
 import { Navigation } from "@/components/navigation"
 import type { EvalHierarchy, HierarchyFamily } from "@/lib/backend-artifacts"
+import { fetchBenchmarkMetadata, fetchEvalHierarchy, fetchEvalList } from "@/lib/dashboard-data-client"
+import type { BenchmarkEvalListItem } from "@/lib/eval-processing"
+import type { BenchmarkCard } from "@/lib/benchmark-schema"
 const PAGE_SIZE = 60
 export default function EvalsPage() {
   const [hierarchy, setHierarchy] = useState<EvalHierarchy | null>(null)
   const [totalModels, setTotalModels] = useState<number>(0)
+  const [evalItems, setEvalItems] = useState<Map<string, BenchmarkEvalListItem>>(new Map())
+  const [benchmarkCards, setBenchmarkCards] = useState<Record<string, BenchmarkCard>>({})
   const [loading, setLoading] = useState(true)
   const [searchQuery, setSearchQuery] = useState("")
   const [sortBy, setSortBy] = useState<FamilySort>("results")
   const [visibleCount, setVisibleCount] = useState(PAGE_SIZE)
+  const [domainPanelOpen, setDomainPanelOpen] = useState(false)
+  const [domainFilter, setDomainFilter] = useState<Set<string>>(new Set())
+  const [selectedCategories, setSelectedCategories] = useState<string[]>([])
   const deferredSearchQuery = useDeferredValue(searchQuery)
   useEffect(() => {
+    Promise.all([fetchEvalHierarchy(), fetchEvalList(), fetchBenchmarkMetadata()])
+      .then(([h, list, metadata]) => {
         setHierarchy(h)
         setTotalModels(list.totalModels)
+        const map = new Map<string, BenchmarkEvalListItem>()
+        for (const item of list.evals) map.set(item.evaluation_id, item)
+        setEvalItems(map)
+        setBenchmarkCards(metadata)
       })
       .catch(console.error)
       .finally(() => setLoading(false))
   const families = hierarchy?.families ?? []
+  // Build a domain → family-count map. The lite eval list doesn't carry
+  // benchmark cards, so we read domains from `benchmark-metadata.json`
+  // (keyed by benchmark / leaf / family key). For each family we union
+  // the domains across the family key itself and every leaf key, then
+  // count one bump per family per distinct domain.
+  const familyDomains = useMemo(() => {
+    const out = new Map<string, Set<string>>()
+    const lookupDomains = (key: string | null | undefined): string[] => {
+      if (!key) return []
+      const card = benchmarkCards[key]
+      const domains = card?.benchmark_details?.domains
+      return Array.isArray(domains) ? domains : []
+    }
+    for (const fam of families) {
+      const seen = new Set<string>()
+      for (const d of lookupDomains(fam.key)) seen.add(d.trim().toLowerCase())
+      for (const leaf of fam.leaves ?? []) {
+        for (const d of leaf.tags?.domains ?? []) seen.add(d.trim().toLowerCase())
+        for (const d of lookupDomains(leaf.key)) seen.add(d.trim().toLowerCase())
+      }
+      for (const id of fam.eval_summary_ids ?? []) {
+        for (const d of lookupDomains(id)) seen.add(d.trim().toLowerCase())
+      }
+      seen.delete("")
+      out.set(fam.key, seen)
+    }
+    return out
+  }, [families, benchmarkCards])
+  // Domain → display label (from the first non-empty card occurrence) +
+  // count of families touching that domain. Sorted descending by count.
+  const domainCounts = useMemo(() => {
+    const counts = new Map<string, number>()
+    const labels = new Map<string, string>()
+    const recordLabel = (raw: string) => {
+      const key = raw.trim().toLowerCase()
+      if (!key || labels.has(key)) return
+      labels.set(key, raw.trim())
+    }
+    for (const card of Object.values(benchmarkCards)) {
+      for (const d of card?.benchmark_details?.domains ?? []) recordLabel(d)
+    }
+    for (const fam of families) {
+      for (const leaf of fam.leaves ?? []) {
+        for (const d of leaf.tags?.domains ?? []) recordLabel(d)
+      }
+    }
+    for (const set of familyDomains.values()) {
+      for (const key of set) counts.set(key, (counts.get(key) ?? 0) + 1)
+    }
+    return Array.from(counts.entries())
+      .map(([key, count]) => ({ domain: labels.get(key) ?? key, count, key }))
+      .sort((a, b) => b.count - a.count || a.domain.localeCompare(b.domain))
+  }, [familyDomains, families, benchmarkCards])
+  const toggleDomain = useCallback((domain: string) => {
+    setDomainFilter((current) => {
+      const next = new Set(current)
+      if (next.has(domain)) next.delete(domain)
+      else next.add(domain)
+      return next
+    })
+  }, [])
+  const clearDomainFilter = useCallback(() => setDomainFilter(new Set()), [])
+  // Categories present on the family list — drives the pill selector
+  // below the toolbar. Sort them by descending family count so the most
+  // common ones surface first.
+  const availableCategories = useMemo(() => {
+    const counts = new Map<string, number>()
+    for (const fam of families) {
+      const cat = fam.category ?? "General"
+      counts.set(cat, (counts.get(cat) ?? 0) + 1)
+    }
+    return Array.from(counts.entries())
+      .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
+      .map(([category]) => category)
+  }, [families])
   const filteredFamilies = useMemo(() => {
     const query = deferredSearchQuery.trim().toLowerCase()
     let list = families
       )
     }
+    if (selectedCategories.length > 0) {
+      const set = new Set(selectedCategories)
+      list = list.filter((fam) => set.has(fam.category ?? "General"))
+    }
+    if (domainFilter.size > 0) {
+      list = list.filter((fam) => {
+        const set = familyDomains.get(fam.key)
+        if (!set) return false
+        for (const key of set) if (domainFilter.has(key)) return true
+        return false
+      })
+    }
     return list.slice().sort((a, b) => {
       switch (sortBy) {
         case "name":
           return familyEvalsCount(b) - familyEvalsCount(a)
       }
     })
+  }, [families, deferredSearchQuery, sortBy, domainFilter, selectedCategories, familyDomains])
   useEffect(() => {
     setVisibleCount(PAGE_SIZE)
+  }, [deferredSearchQuery, sortBy, domainFilter, selectedCategories])
   const visibleFamilies = useMemo(
     () => filteredFamilies.slice(0, visibleCount),
         <p className="ec-page-lede">
           Evaluations are grouped into <strong>families</strong>. A family holds one or more
           benchmarks; each benchmark has one or more slices; each slice reports one or more
+          metrics.
         </p>
         {/* META ROW ------------------------------------------------- */}
           <div className="grow" />
+          {domainCounts.length > 0 && (
+            <button
+              type="button"
+              onClick={() => setDomainPanelOpen((v) => !v)}
+              className="inline-flex items-center gap-2"
+              style={{
+                fontFamily: "var(--font-mono)",
+                fontSize: 10,
+                letterSpacing: "0.14em",
+                textTransform: "uppercase",
+                padding: "6px 12px",
+                border: "1px solid var(--border-strong)",
+                background:
+                  domainPanelOpen || domainFilter.size > 0 ? "var(--fg)" : "var(--bg)",
+                color:
+                  domainPanelOpen || domainFilter.size > 0 ? "var(--bg)" : "var(--fg)",
+                cursor: "pointer",
+              }}
+              aria-expanded={domainPanelOpen}
+            >
+              <Tag className="h-3 w-3" aria-hidden />
+              Filter by domain
+              {domainFilter.size > 0 && (
+                <span className="font-mono tabular-nums" style={{ marginLeft: 2 }}>
+                  · {domainFilter.size}
+                </span>
+              )}
+              {domainPanelOpen ? (
+                <ChevronUp className="h-3 w-3" aria-hidden />
+              ) : (
+                <ChevronDown className="h-3 w-3" aria-hidden />
+              )}
+            </button>
+          )}
           <select
             className="ec-select"
             value={sortBy}
           </select>
         </div>
+        {/* DOMAIN FILTER PANEL — collapsed by default, opens when the user
+            wants to slice the family list by topical domain. Picks unfurl
+            every aggregator family in the table below so matching
+            benchmarks are immediately visible. */}
+        {domainPanelOpen && domainCounts.length > 0 && (
+          <div
+            className="mb-6"
+            style={{
+              border: "1px solid var(--border-soft)",
+              background: "var(--bg-warm)",
+              padding: "12px 16px",
+            }}
+          >
+            <div className="mb-3 flex items-center justify-between gap-3">
+              <div
+                className="font-mono uppercase"
+                style={{ fontSize: 10, letterSpacing: "0.14em", color: "var(--fg-subtle)" }}
+              >
+                {domainFilter.size === 0
+                  ? `Pick one or more domains · ${domainCounts.length} available`
+                  : `${domainFilter.size} selected · ${domainCounts.length - domainFilter.size} more`}
+              </div>
+              {domainFilter.size > 0 && (
+                <button
+                  type="button"
+                  onClick={clearDomainFilter}
+                  className="font-mono uppercase"
+                  style={{
+                    fontSize: 10,
+                    letterSpacing: "0.12em",
+                    color: "var(--fg-subtle)",
+                    background: "transparent",
+                    border: 0,
+                    cursor: "pointer",
+                  }}
+                >
+                  Clear
+                </button>
+              )}
+            </div>
+            <div className="flex flex-wrap gap-1.5">
+              {domainCounts.map(({ domain, count }) => {
+                const key = domain.trim().toLowerCase()
+                const selected = domainFilter.has(key)
+                return (
+                  <button
+                    key={key}
+                    type="button"
+                    onClick={() => toggleDomain(key)}
+                    className="ec-tag outline inline-flex items-center gap-1.5"
+                    style={{
+                      cursor: "pointer",
+                      background: selected ? "var(--fg)" : "var(--bg)",
+                      color: selected ? "var(--bg)" : "var(--fg)",
+                      borderColor: selected ? "var(--fg)" : "var(--border-strong)",
+                      textTransform: "none",
+                      letterSpacing: "normal",
+                      fontFamily: "var(--font-sans)",
+                    }}
+                    aria-pressed={selected}
+                  >
+                    <span className="text-[12px] font-medium capitalize">{domain}</span>
+                    <span
+                      className="font-mono text-[10px] tabular-nums"
+                      style={{ color: selected ? "var(--bg)" : "var(--fg-muted)" }}
+                    >
+                      {count}
+                    </span>
+                  </button>
+                )
+              })}
+            </div>
+          </div>
+        )}
+        {/* CATEGORY PILLS — quick toggle filter by category. Mirrors the
+            same pattern used on benchmark-detail's matrix browser. */}
+        {availableCategories.length > 0 && (
+          <div className="mb-5 flex flex-wrap items-center gap-2">
+            <span className="kicker mr-2">Category</span>
+            <button
+              type="button"
+              onClick={() => setSelectedCategories([])}
+              className={`ec-pill ${selectedCategories.length === 0 ? "on" : ""}`}
+            >
+              All
+            </button>
+            {availableCategories.map((category) => {
+              const isSelected = selectedCategories.includes(category)
+              return (
+                <button
+                  key={category}
+                  type="button"
+                  onClick={() =>
+                    setSelectedCategories((current) =>
+                      current.includes(category)
+                        ? current.filter((item) => item !== category)
+                        : [...current, category],
+                    )
+                  }
+                  className={`ec-pill ${isSelected ? "on" : ""}`}
+                >
+                  {category}
+                </button>
+              )
+            })}
+          </div>
+        )}
         {/* TABLE ---------------------------------------------------- */}
         {loading ? (
           <div className="py-24 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">
             </button>
           </div>
         ) : (
+          <FamilyTable
+            families={visibleFamilies}
+            totalModels={totalModels}
+            evalItems={evalItems}
+            benchmarkCards={benchmarkCards}
+            domainFilter={domainFilter}
+          />
         )}
         <InfiniteScrollSentinel

app/globals.css CHANGED Viewed

@@ -1117,55 +1117,316 @@
    any width/transform transitions so the fill bar tracks the cursor 1:1
    instead of lagging 300ms behind every move (the prior cause of the
    "jumpy" feel). */
 .param-range-input {
   appearance: none;
   position: absolute;
-  left: 0.375rem;
-  right: 0.375rem;
-  top: 50%;
-  transform: translateY(-50%);
-  width: calc(100% - 0.75rem);
-  height: 18px;
   background: transparent;
   pointer-events: none;
   margin: 0;
   padding: 0;
 }
-.param-range-input::-webkit-slider-runnable-track { height: 18px; background: transparent; border: 0; }
 .param-range-input::-webkit-slider-thumb {
   appearance: none;
   pointer-events: auto;
-  height: 18px;
-  width: 18px;
-  border-radius: 9999px;
-  border: 2px solid var(--color-foreground);
-  background: var(--color-foreground);
   margin-top: 0;
-  cursor: grab;
-  box-shadow: 0 0 0 3px color-mix(in srgb, var(--color-background) 85%, transparent);
 }
-.param-range-input::-webkit-slider-thumb:active { cursor: grabbing; }
-.param-range-input::-moz-range-track { height: 18px; border: 0; background: transparent; }
 .param-range-input::-moz-range-thumb {
   pointer-events: auto;
-  height: 18px;
-  width: 18px;
-  border-radius: 9999px;
-  border: 2px solid var(--color-foreground);
-  background: var(--color-foreground);
-  cursor: grab;
-  box-shadow: 0 0 0 3px color-mix(in srgb, var(--color-background) 85%, transparent);
 }
-.param-range-input::-moz-range-thumb:active { cursor: grabbing; }
 .param-range-input:focus-visible::-webkit-slider-thumb {
-  box-shadow:
-    0 0 0 3px color-mix(in srgb, var(--color-background) 85%, transparent),
-    0 0 0 6px color-mix(in srgb, var(--color-ring) 35%, transparent);
 }
 .param-range-input:focus-visible::-moz-range-thumb {
-  box-shadow:
-    0 0 0 3px color-mix(in srgb, var(--color-background) 85%, transparent),
-    0 0 0 6px color-mix(in srgb, var(--color-ring) 35%, transparent);
 }
 @media (prefers-reduced-motion: reduce) {

    any width/transform transitions so the fill bar tracks the cursor 1:1
    instead of lagging 300ms behind every move (the prior cause of the
    "jumpy" feel). */
+/* ============================================================
+   Param-range picker (themed)
+   - Dual-handle dual-input slider, snap to fixed bucket ticks
+   - Hairline rail with labelled bucket marks above the rail
+   - Square outline thumbs (matches the editorial sharp-corner language)
+   - Mono numerals in the read-out, kicker label on the left
+   - Variants: default (A), inline (B), promo (C)
+   ============================================================ */
+.pr-slider {
+  display: grid;
+  grid-template-columns: 180px 1fr minmax(130px, auto);
+  gap: 24px;
+  align-items: center;
+}
+.pr-slider-track-only {
+  grid-template-columns: 1fr minmax(130px, auto);
+  gap: 18px;
+}
+.pr-slider.inline {
+  grid-template-columns: max-content 1fr max-content;
+  gap: 18px;
+}
+.pr-readout-cell {
+  display: inline-flex;
+  align-items: center;
+  gap: 10px;
+  white-space: nowrap;
+  justify-self: end;
+}
+.pr-label {
+  font-family: var(--font-mono);
+  font-size: 10px;
+  letter-spacing: 0.16em;
+  text-transform: uppercase;
+  color: var(--fg-subtle);
+  line-height: 1.45;
+}
+.pr-label strong {
+  display: block;
+  color: var(--fg);
+  font-weight: 600;
+  margin-bottom: 2px;
+}
+.pr-label.inline-label {
+  white-space: nowrap;
+}
+.pr-label.inline-label strong {
+  display: inline;
+  margin: 0;
+}
+.pr-track-wrap {
+  position: relative;
+  height: 56px;
+  user-select: none;
+  min-width: 0;
+}
+.pr-ticks {
+  position: absolute;
+  left: 0;
+  right: 0;
+  top: 0;
+  font-family: var(--font-mono);
+  font-size: 9.5px;
+  letter-spacing: 0.06em;
+  color: var(--fg-subtle);
+  font-variant-numeric: tabular-nums;
+  pointer-events: none;
+}
+.pr-tick {
+  position: absolute;
+  top: 0;
+  width: 1px;
+  text-align: center;
+}
+.pr-tick::after {
+  content: "";
+  position: absolute;
+  left: 50%;
+  top: 18px;
+  width: 1px;
+  height: 6px;
+  background: var(--border-strong);
+  transform: translateX(-50%);
+}
+.pr-tick.on::after {
+  background: var(--fg);
+}
+.pr-tick > span {
+  display: inline-block;
+  white-space: nowrap;
+}
+.pr-rail {
+  position: absolute;
+  left: 0;
+  right: 0;
+  top: 36px;
+  height: 2px;
+  background: var(--border-strong);
+}
+.pr-fill {
+  position: absolute;
+  top: 36px;
+  height: 2px;
+  background: var(--fg);
+  transition: left 0.08s linear, width 0.08s linear;
+}
+.pr-microticks {
+  position: absolute;
+  left: 0;
+  right: 0;
+  top: 36px;
+  height: 2px;
+  pointer-events: none;
+}
+.pr-microticks > span {
+  position: absolute;
+  top: -2px;
+  width: 1px;
+  height: 6px;
+  background: var(--border-soft);
+  transform: translateX(-50%);
+}
+.pr-readout {
+  font-family: var(--font-mono);
+  font-size: 11px;
+  color: var(--fg);
+  text-align: right;
+  padding: 6px 10px;
+  border: 1px solid var(--border);
+  border-bottom: 1.5px solid var(--fg);
+  background: var(--bg);
+  font-variant-numeric: tabular-nums;
+  letter-spacing: 0.04em;
+  white-space: nowrap;
+}
+.pr-readout .arrow {
+  color: var(--fg-subtle);
+  margin: 0 4px;
+}
+.pr-readout.inline {
+  border: 0;
+  padding: 0;
+  font-weight: 600;
+  text-align: left;
+  background: transparent;
+}
+.pr-reset {
+  font-family: var(--font-mono);
+  font-size: 10px;
+  letter-spacing: 0.12em;
+  text-transform: uppercase;
+  color: var(--fg-subtle);
+  background: transparent;
+  border: 0;
+  cursor: pointer;
+  padding: 2px 4px;
+  white-space: nowrap;
+  transition: color var(--transition);
+}
+.pr-reset:hover {
+  color: var(--accent);
+}
+/* Inline toggle: "Show models without a reported size". Sits next to the
+   readout, matches the editorial sharp-corner vocabulary. */
+.pr-unknown-toggle {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  font-family: var(--font-mono);
+  font-size: 10px;
+  letter-spacing: 0.12em;
+  text-transform: uppercase;
+  color: var(--fg-subtle);
+  background: transparent;
+  border: 0;
+  cursor: pointer;
+  padding: 2px 4px;
+  white-space: nowrap;
+  transition: color var(--transition);
+}
+.pr-unknown-toggle.on {
+  color: var(--fg);
+}
+.pr-unknown-toggle:hover {
+  color: var(--accent);
+}
+.pr-unknown-toggle-box {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 12px;
+  height: 12px;
+  border: 1.5px solid currentColor;
+  font-size: 10px;
+  line-height: 1;
+}
+.pr-unknown-toggle.on .pr-unknown-toggle-box {
+  background: var(--fg);
+  color: var(--bg);
+  border-color: var(--fg);
+}
+/* Variant C: warm-bg, left-accent, "narrow the matrix" framing */
+.pr-promo {
+  display: grid;
+  grid-template-columns: minmax(220px, 1fr) minmax(420px, 2fr) auto;
+  gap: 28px;
+  align-items: center;
+  padding: 16px 20px;
+  border: 1px solid var(--border);
+  background: var(--bg-warm);
+  border-left: 2px solid var(--accent);
+}
+.pr-promo .pr-promo-head {
+  line-height: 1.5;
+  min-width: 0;
+}
+.pr-promo .pr-promo-head .kicker {
+  display: block;
+  margin-bottom: 4px;
+}
+.pr-promo .pr-promo-head p {
+  margin: 0;
+  font-size: 13px;
+  color: var(--fg-muted);
+}
+@media (max-width: 900px) {
+  .pr-promo {
+    grid-template-columns: 1fr;
+  }
+  .pr-slider {
+    grid-template-columns: 1fr;
+    gap: 16px;
+  }
+  .pr-slider.inline {
+    grid-template-columns: max-content 1fr max-content;
+  }
+}
+/* Native range inputs sit above the visual rail to provide drag + a11y.
+   We hide the runnable-track and style only the thumb. The two inputs
+   overlap perfectly (same min/max/step) and only the thumbs are clickable
+   so the user can grab either handle without warping the wrong one. */
 .param-range-input {
   appearance: none;
   position: absolute;
+  left: 0;
+  right: 0;
+  top: 31px;
+  width: 100%;
+  height: 14px;
   background: transparent;
   pointer-events: none;
   margin: 0;
   padding: 0;
 }
+.param-range-input::-webkit-slider-runnable-track {
+  height: 14px;
+  background: transparent;
+  border: 0;
+}
 .param-range-input::-webkit-slider-thumb {
   appearance: none;
   pointer-events: auto;
+  height: 12px;
+  width: 12px;
+  border: 2px solid var(--fg);
+  background: var(--bg);
+  border-radius: 0;
   margin-top: 0;
+  cursor: ew-resize;
+  transition: background var(--transition), border-color var(--transition);
+}
+.param-range-input::-webkit-slider-thumb:hover {
+  background: var(--fg);
+}
+.param-range-input::-moz-range-track {
+  height: 14px;
+  border: 0;
+  background: transparent;
 }
 .param-range-input::-moz-range-thumb {
   pointer-events: auto;
+  height: 12px;
+  width: 12px;
+  border: 2px solid var(--fg);
+  background: var(--bg);
+  border-radius: 0;
+  cursor: ew-resize;
+  transition: background var(--transition), border-color var(--transition);
+}
+.param-range-input::-moz-range-thumb:hover {
+  background: var(--fg);
+}
+.param-range-input:focus-visible {
+  outline: none;
 }
 .param-range-input:focus-visible::-webkit-slider-thumb {
+  background: var(--accent);
+  border-color: var(--accent);
 }
 .param-range-input:focus-visible::-moz-range-thumb {
+  background: var(--accent);
+  border-color: var(--accent);
 }
 @media (prefers-reduced-motion: reduce) {

app/models/page.tsx CHANGED Viewed

@@ -9,30 +9,14 @@ import { InfiniteScrollSentinel } from "@/components/infinite-scroll"
 import { ModelCompareDialog } from "@/components/model-compare-dialog"
 import { ModelTable } from "@/components/model-table"
 import { Navigation } from "@/components/navigation"
 import { fetchDevelopers, fetchModelCards, fetchBenchmarkMetadata, type DeveloperListItem } from "@/lib/dashboard-data-client"
 import type { BenchmarkCard } from "@/lib/benchmark-schema"
 const PAGE_SIZE = 40
 const MAX_COMPARE_MODELS = 4
-const PARAM_RANGE_VALUES = [1, 2, 3, 4, 6, 8, 10, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 500] as const
-const PARAM_RANGE_MARKERS = [
-  { label: "< 1B", step: 0 },
-  { label: "6B", step: PARAM_RANGE_VALUES.indexOf(6) },
-  { label: "12B", step: PARAM_RANGE_VALUES.indexOf(12) },
-  { label: "32B", step: PARAM_RANGE_VALUES.indexOf(32) },
-  { label: "128B", step: PARAM_RANGE_VALUES.indexOf(128) },
-  { label: "> 500B", step: PARAM_RANGE_VALUES.length - 1 },
-] as const
-function formatParamBoundLabel(step: number, bound: "min" | "max") {
-  const maxStepIndex = PARAM_RANGE_VALUES.length - 1
-  if (bound === "min" && step <= 0) return "< 1B"
-  if (bound === "max" && step >= maxStepIndex) return "> 500B"
-  const value = PARAM_RANGE_VALUES[step]
-  return value != null ? `${value}B` : "?"
-}
 type ModelSort = "benchmarks" | "results" | "name" | "released" | "params"
 type DevSort = "coverage" | "evaluated" | "models" | "name"
@@ -61,17 +45,11 @@ export default function ModelsPage() {
   const [compareOpen, setCompareOpen] = useState(false)
   const [visibleCount, setVisibleCount] = useState(PAGE_SIZE)
   const [minParamStep, setMinParamStep] = useState(0)
-  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_VALUES.length - 1)
   const deferredSearchQuery = useDeferredValue(searchQuery)
-  const maxParamStepIndex = PARAM_RANGE_VALUES.length - 1
-  const numericMinParams = useMemo(
-    () => (minParamStep <= 0 ? null : PARAM_RANGE_VALUES[minParamStep] ?? null),
-    [minParamStep]
-  )
-  const numericMaxParams = useMemo(
-    () => (maxParamStep >= PARAM_RANGE_VALUES.length - 1 ? null : PARAM_RANGE_VALUES[maxParamStep] ?? null),
-    [maxParamStep]
-  )
   useEffect(() => {
     Promise.all([fetchModelCards(), fetchBenchmarkMetadata()])
@@ -106,16 +84,12 @@ export default function ModelsPage() {
     const query = deferredSearchQuery.trim().toLowerCase()
     let filtered = evaluations
-    if (numericMinParams != null) {
-      filtered = filtered.filter(
-        (row) => row.params_billions != null && row.params_billions >= numericMinParams
-      )
-    }
-    if (numericMaxParams != null) {
-      filtered = filtered.filter(
-        (row) => row.params_billions != null && row.params_billions <= numericMaxParams
-      )
-    }
     if (query) {
       filtered = filtered.filter((row) => {
@@ -143,7 +117,7 @@ export default function ModelsPage() {
           return b.benchmarks_count - a.benchmarks_count
       }
     })
-  }, [evaluations, deferredSearchQuery, modelSortBy, numericMinParams, numericMaxParams])
   // Developers — filter + sort
   const sortedDevelopers = useMemo(() => {
@@ -176,7 +150,7 @@ export default function ModelsPage() {
   // Reset visible window when filter/sort changes
   useEffect(() => {
     setVisibleCount(PAGE_SIZE)
-  }, [groupByDeveloper, modelSortBy, developerSortBy, deferredSearchQuery, minParamStep, maxParamStep])
   const totalCount = groupByDeveloper ? sortedDevelopers.length : sortedEvaluations.length
   const visibleEvaluations = useMemo(
@@ -292,69 +266,6 @@ export default function ModelsPage() {
             />
           </div>
-          {!groupByDeveloper && (
-            <>
-              <span className="hidden h-5 w-px bg-[color:var(--border-soft)] sm:block" />
-              {/* Compact param picker — kicker + slider + readout, all inline */}
-              <div className="flex min-w-[260px] flex-1 items-center gap-4 sm:max-w-[300px]">
-                <span className="kicker shrink-0">Params</span>
-                <div className="relative h-4 min-w-0 flex-1">
-                  <div className="absolute inset-x-2 top-1/2 h-[2px] -translate-y-1/2 bg-[color:var(--border-soft)]" />
-                  <div className="absolute inset-x-2 top-1/2 h-[2px] -translate-y-1/2">
-                    <div
-                      className="absolute inset-y-0 bg-[color:var(--fg)] transition-[left,right] duration-200 ease-[var(--ease-out-quart)]"
-                      style={{
-                        left: `${(minParamStep / maxParamStepIndex) * 100}%`,
-                        right: `${Math.max(100 - (maxParamStep / maxParamStepIndex) * 100, 0)}%`,
-                      }}
-                    />
-                  </div>
-                  <input
-                    type="range"
-                    min={0}
-                    max={maxParamStepIndex}
-                    step={1}
-                    value={minParamStep}
-                    onChange={(event) =>
-                      setMinParamStep(Math.min(Number(event.target.value), maxParamStep))
-                    }
-                    className="param-range-input"
-                    aria-label="Minimum parameter filter"
-                  />
-                  <input
-                    type="range"
-                    min={0}
-                    max={maxParamStepIndex}
-                    step={1}
-                    value={maxParamStep}
-                    onChange={(event) =>
-                      setMaxParamStep(Math.max(Number(event.target.value), minParamStep))
-                    }
-                    className="param-range-input"
-                    aria-label="Maximum parameter filter"
-                  />
-                </div>
-                <span className="shrink-0 whitespace-nowrap font-mono text-[11px] tabular-nums text-[color:var(--fg-muted)]">
-                  {formatParamBoundLabel(minParamStep, "min")} – {formatParamBoundLabel(maxParamStep, "max")}
-                </span>
-                {(minParamStep > 0 || maxParamStep < maxParamStepIndex) && (
-                  <button
-                    type="button"
-                    onClick={() => {
-                      setMinParamStep(0)
-                      setMaxParamStep(maxParamStepIndex)
-                    }}
-                    className="shrink-0 font-mono text-[10px] uppercase tracking-[0.12em] text-[color:var(--fg-subtle)] hover:text-[color:var(--accent)] transition-colors"
-                    aria-label="Reset parameters filter"
-                  >
-                    Reset
-                  </button>
-                )}
-              </div>
-            </>
-          )}
           <select
             className="ec-select ml-auto shrink-0"
             value={groupByDeveloper ? developerSortBy : modelSortBy}
@@ -383,6 +294,27 @@ export default function ModelsPage() {
           </select>
         </div>
         {/* TABLE ---------------------------------------------------- */}
         {loading ? (
           <div className="py-24 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">

 import { ModelCompareDialog } from "@/components/model-compare-dialog"
 import { ModelTable } from "@/components/model-table"
 import { Navigation } from "@/components/navigation"
+import { ParamRangePicker } from "@/components/param-range-picker"
 import { fetchDevelopers, fetchModelCards, fetchBenchmarkMetadata, type DeveloperListItem } from "@/lib/dashboard-data-client"
 import type { BenchmarkCard } from "@/lib/benchmark-schema"
+import { PARAM_RANGE_MAX_INDEX, paramStepToNumeric } from "@/lib/param-range"
 const PAGE_SIZE = 40
 const MAX_COMPARE_MODELS = 4
 type ModelSort = "benchmarks" | "results" | "name" | "released" | "params"
 type DevSort = "coverage" | "evaluated" | "models" | "name"
   const [compareOpen, setCompareOpen] = useState(false)
   const [visibleCount, setVisibleCount] = useState(PAGE_SIZE)
   const [minParamStep, setMinParamStep] = useState(0)
+  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_MAX_INDEX)
+  const [showUnknownSize, setShowUnknownSize] = useState(true)
   const deferredSearchQuery = useDeferredValue(searchQuery)
+  const numericMinParams = useMemo(() => paramStepToNumeric(minParamStep, "min"), [minParamStep])
+  const numericMaxParams = useMemo(() => paramStepToNumeric(maxParamStep, "max"), [maxParamStep])
   useEffect(() => {
     Promise.all([fetchModelCards(), fetchBenchmarkMetadata()])
     const query = deferredSearchQuery.trim().toLowerCase()
     let filtered = evaluations
+    filtered = filtered.filter((row) => {
+      if (row.params_billions == null) return showUnknownSize
+      if (numericMinParams != null && row.params_billions < numericMinParams) return false
+      if (numericMaxParams != null && row.params_billions > numericMaxParams) return false
+      return true
+    })
     if (query) {
       filtered = filtered.filter((row) => {
           return b.benchmarks_count - a.benchmarks_count
       }
     })
+  }, [evaluations, deferredSearchQuery, modelSortBy, numericMinParams, numericMaxParams, showUnknownSize])
   // Developers — filter + sort
   const sortedDevelopers = useMemo(() => {
   // Reset visible window when filter/sort changes
   useEffect(() => {
     setVisibleCount(PAGE_SIZE)
+  }, [groupByDeveloper, modelSortBy, developerSortBy, deferredSearchQuery, minParamStep, maxParamStep, showUnknownSize])
   const totalCount = groupByDeveloper ? sortedDevelopers.length : sortedEvaluations.length
   const visibleEvaluations = useMemo(
             />
           </div>
           <select
             className="ec-select ml-auto shrink-0"
             value={groupByDeveloper ? developerSortBy : modelSortBy}
           </select>
         </div>
+        {/* PARAM RANGE — its own row so the rail has room to breathe.
+            Sharing the toolbar with stats / search / sort squished it. */}
+        {!groupByDeveloper && (
+          <div className="mb-6 -mt-2">
+            <ParamRangePicker
+              variant="inline"
+              headline="Params"
+              minStep={minParamStep}
+              maxStep={maxParamStep}
+              onMinChange={setMinParamStep}
+              onMaxChange={setMaxParamStep}
+              onReset={() => {
+                setMinParamStep(0)
+                setMaxParamStep(PARAM_RANGE_MAX_INDEX)
+              }}
+              showUnknownSize={showUnknownSize}
+              onShowUnknownSizeChange={setShowUnknownSize}
+            />
+          </div>
+        )}
         {/* TABLE ---------------------------------------------------- */}
         {loading ? (
           <div className="py-24 text-center font-mono text-[11px] uppercase tracking-[0.2em] text-[color:var(--fg-subtle)]">

app/page.tsx CHANGED Viewed

@@ -244,7 +244,7 @@ export default async function HomePage() {
               <p className="mx-auto mt-2 max-w-2xl text-sm leading-6 text-[color:var(--fg-muted)]">
                 The current backend snapshot does not include{" "}
                 <code className="rounded-sm bg-[color:var(--bg-surface)] px-1.5 py-0.5 font-mono text-xs">
-                  corpus-aggregates.json
                 </code>
                 . When it does, this section will render the four corpus-level rollups.
               </p>

               <p className="mx-auto mt-2 max-w-2xl text-sm leading-6 text-[color:var(--fg-muted)]">
                 The current backend snapshot does not include{" "}
                 <code className="rounded-sm bg-[color:var(--bg-surface)] px-1.5 py-0.5 font-mono text-xs">
+                  headline.json
                 </code>
                 . When it does, this section will render the four corpus-level rollups.
               </p>

components/benchmark-detail.tsx CHANGED Viewed

@@ -4205,10 +4205,10 @@ function SampleDataDialog({
   return (
     <>
-      <Button variant="outline" size="sm" className="gap-2" onClick={handleOpenToggle}>
-        <Database className="h-4 w-4" />
         {open ? "Hide instances" : "View all instances"}
-      </Button>
       {open && (
       <div className="rounded-xl border bg-background p-4 space-y-3">
         <div>

   return (
     <>
+      <button type="button" className="btn-ec outline inline-flex items-center gap-2" onClick={handleOpenToggle}>
+        <Database className="h-3.5 w-3.5" />
         {open ? "Hide instances" : "View all instances"}
+      </button>
       {open && (
       <div className="rounded-xl border bg-background p-4 space-y-3">
         <div>

components/eval-detail.tsx CHANGED Viewed

@@ -3,14 +3,20 @@
 import { useAudienceMode } from "@/components/audience-mode-provider"
 import { Fragment, useEffect, useMemo, useState } from "react"
 import Link from "next/link"
 import { CompletenessPanel } from "@/components/signals/completeness-panel"
 import { ComparabilityPanel } from "@/components/signals/comparability-panel"
-import { ReproducibilityPanel } from "@/components/signals/reproducibility-panel"
 import { SignalsRowBadges } from "@/components/signals/signals-row-badges"
-import { RowSignalsCompact } from "@/components/signals/row-signals-compact"
 import { getCompletenessPopulatedCount } from "@/components/signals/signal-utils"
 import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"
 import { ScoreDistribution } from "@/components/score-distribution"
 import {
   Dialog,
   DialogContent,
@@ -264,107 +270,6 @@ function SliceSelector({
   )
 }
-const PARAM_RANGE_VALUES = [1, 2, 3, 4, 6, 8, 10, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 500] as const
-const PARAM_RANGE_MARKERS = [
-  { label: "< 1B", step: 0 },
-  { label: "6B", step: PARAM_RANGE_VALUES.indexOf(6) },
-  { label: "12B", step: PARAM_RANGE_VALUES.indexOf(12) },
-  { label: "32B", step: PARAM_RANGE_VALUES.indexOf(32) },
-  { label: "128B", step: PARAM_RANGE_VALUES.indexOf(128) },
-  { label: "> 500B", step: PARAM_RANGE_VALUES.length - 1 },
-] as const
-function formatParamBoundLabel(step: number, bound: "min" | "max") {
-  const maxStepIndex = PARAM_RANGE_VALUES.length - 1
-  if (bound === "min" && step <= 0) {
-    return "< 1B"
-  }
-  if (bound === "max" && step >= maxStepIndex) {
-    return "> 500B"
-  }
-  const value = PARAM_RANGE_VALUES[step]
-  return value != null ? `${value}B` : "Not reported"
-}
-function parseParamsBillionsFromText(value: string | null | undefined) {
-  if (!value) {
-    return null
-  }
-  const normalized = value.trim().toLowerCase()
-  if (!normalized) {
-    return null
-  }
-  const compact = normalized.replace(/,/g, "")
-  const tokenMatch = compact.match(/(\d+(?:\.\d+)?)\s*(trillion|tn|t|billion|bn|b|million|mn|m|thousand|k)\b/)
-  if (tokenMatch) {
-    const amount = Number.parseFloat(tokenMatch[1])
-    if (!Number.isFinite(amount)) {
-      return null
-    }
-    const unit = tokenMatch[2]
-    if (unit === "trillion" || unit === "tn" || unit === "t") {
-      return amount * 1000
-    }
-    if (unit === "billion" || unit === "bn" || unit === "b") {
-      return amount
-    }
-    if (unit === "million" || unit === "mn" || unit === "m") {
-      return amount / 1000
-    }
-    if (unit === "thousand" || unit === "k") {
-      return amount / 1_000_000
-    }
-  }
-  const numeric = Number.parseFloat(compact)
-  return Number.isFinite(numeric) ? numeric : null
-}
-function parseParamsBillionsFromModelName(modelName: string | null | undefined) {
-  if (!modelName) {
-    return null
-  }
-  const sizeTokens = Array.from(modelName.matchAll(/\b(\d+(?:\.\d+)?)\s*([tmbk])\b/gi))
-  if (sizeTokens.length === 0) {
-    return null
-  }
-  const lastToken = sizeTokens[sizeTokens.length - 1]
-  const numericValue = Number.parseFloat(lastToken[1])
-  if (!Number.isFinite(numericValue)) {
-    return null
-  }
-  const unit = lastToken[2].toLowerCase()
-  if (unit === "t") {
-    return numericValue * 1000
-  }
-  if (unit === "b") {
-    return numericValue
-  }
-  if (unit === "m") {
-    return numericValue / 1000
-  }
-  if (unit === "k") {
-    return numericValue / 1_000_000
-  }
-  return null
-}
 function getParamsBillionsFromModelInfo(modelInfo: ModelResultForBenchmark["model_info"]) {
   const additionalDetails = modelInfo.additional_details
   const rawParamsBillions =
@@ -568,32 +473,16 @@ export function EvalDetail({ summary }: EvalDetailProps) {
   const [expandedRows, setExpandedRows] = useState<Record<string, boolean>>({})
   const [leaderboardPage, setLeaderboardPage] = useState(1)
   const [minParamStep, setMinParamStep] = useState(0)
-  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_VALUES.length - 1)
   const maxScore = summary.metric_config.max_score ?? 1
   const minScore = summary.metric_config.min_score ?? 0
   const range = maxScore - minScore
   const normalizeScore = (raw: number) => (range > 0 ? (raw - minScore) / range : raw)
-  const maxParamStepIndex = PARAM_RANGE_VALUES.length - 1
-  const minHandlePercent = (minParamStep / maxParamStepIndex) * 100
-  const maxHandlePercent = (maxParamStep / maxParamStepIndex) * 100
-  const numericMinParams = useMemo(() => {
-    if (minParamStep <= 0) {
-      return null
-    }
-    return PARAM_RANGE_VALUES[minParamStep] ?? null
-  }, [minParamStep])
-  const numericMaxParams = useMemo(() => {
-    if (maxParamStep >= PARAM_RANGE_VALUES.length - 1) {
-      return null
-    }
-    return PARAM_RANGE_VALUES[maxParamStep] ?? null
-  }, [maxParamStep])
   const sortedResults = useMemo(
     () =>
@@ -603,6 +492,8 @@ export function EvalDetail({ summary }: EvalDetailProps) {
     [summary.model_results, summary.metric_config.lower_is_better]
   )
   const hasParameterData = useMemo(
     () => sortedResults.some((result) => getParamsBillions(result) != null),
     [sortedResults]
@@ -612,17 +503,13 @@ export function EvalDetail({ summary }: EvalDetailProps) {
     return sortedResults.filter((modelResult) => {
       const paramsBillions = getParamsBillions(modelResult)
-      if (numericMinParams != null && (paramsBillions == null || paramsBillions < numericMinParams)) {
-        return false
-      }
-      if (numericMaxParams != null && (paramsBillions == null || paramsBillions > numericMaxParams)) {
-        return false
-      }
       return true
     })
-  }, [numericMaxParams, numericMinParams, sortedResults])
   const leaderboardRows = useMemo<LeaderboardRow[]>(() => {
     let currentRank = 0
@@ -675,10 +562,6 @@ export function EvalDetail({ summary }: EvalDetailProps) {
       [key]: !current[key],
     }))
-  const evalKindLabel = summary.is_aggregated
-    ? (isResearchView ? "Composite · §3.2" : "Benchmark suite")
-    : (isResearchView ? "Single benchmark" : "Benchmark")
   const headerOrg = summary.composite_benchmark_name && summary.composite_benchmark_name !== summary.evaluation_name
     ? summary.composite_benchmark_name
     : null
@@ -693,10 +576,9 @@ export function EvalDetail({ summary }: EvalDetailProps) {
     <div className="space-y-12">
       {/* HERO — paper §3.1 ------------------------------------------------ */}
       <header className="motion-academic-enter">
-        <div className="kicker kicker-accent mb-2">{evalKindLabel}</div>
         <h1
           className="font-bold tracking-[-0.025em]"
-          style={{ fontSize: "clamp(40px, 5vw, 60px)", lineHeight: 1.04, margin: "8px 0 12px" }}
         >
           {summary.evaluation_name}
         </h1>
@@ -790,6 +672,9 @@ export function EvalDetail({ summary }: EvalDetailProps) {
         <CollapsibleContent className="mt-3">
           <div className="space-y-4">
             {/* Metric spec / nested datalist (paper-aligned hairline def-list) */}
             <div className="ec-card warm" style={{ padding: "18px 22px" }}>
               <div className="kicker mb-3">
@@ -992,96 +877,27 @@ export function EvalDetail({ summary }: EvalDetailProps) {
             </div>
           )}
-          <div className="ec-card" style={{ padding: 0, overflow: "hidden" }}>
-            {hasParameterData && (
-              <div
-                style={{
-                  borderBottom: "1px solid var(--border-soft)",
-                  background: "var(--bg-warm)",
-                  padding: "16px 20px",
                 }}
-              >
-                <div className="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
-                  <div className="space-y-1">
-                    <div className="text-[11px] font-semibold uppercase tracking-[0.18em] text-muted-foreground">
-                      Parameter range
-                    </div>
-                    <div className="text-sm text-muted-foreground">
-                      Narrow the leaderboard to comparable model sizes.
-                    </div>
-                  </div>
-                  <div className="flex min-w-0 flex-1 items-center gap-4 lg:max-w-[40rem]">
-                    <div className="min-w-0 flex-1">
-                      <div className="mb-2 flex items-center justify-between text-[10px] font-medium uppercase tracking-[0.14em] text-muted-foreground">
-                        {PARAM_RANGE_MARKERS.map((marker) => (
-                          <span key={marker.label} className="text-center">
-                            {marker.label}
-                          </span>
-                        ))}
-                      </div>
-                      <div className="relative h-4">
-                        <div className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2 rounded-full bg-border/80" />
-                        <div className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2">
-                          <div
-                            className="absolute inset-y-0 rounded-full bg-foreground"
-                            style={{
-                              left: `${minHandlePercent}%`,
-                              right: `${Math.max(100 - maxHandlePercent, 0)}%`,
-                            }}
-                          />
-                        </div>
-                        <div className="absolute inset-x-1.5 top-1/2 -translate-y-1/2">
-                          {PARAM_RANGE_VALUES.map((_, stepIndex) => (
-                            <span
-                              key={`param-tick-${stepIndex}`}
-                              className="absolute top-0 h-2 w-px -translate-x-1/2 rounded-full bg-border"
-                              style={{ left: `${(stepIndex / maxParamStepIndex) * 100}%` }}
-                              aria-hidden="true"
-                            />
-                          ))}
-                        </div>
-                        <input
-                          type="range"
-                          min={0}
-                          max={maxParamStepIndex}
-                          step={1}
-                          value={minParamStep}
-                          onChange={(event) => {
-                            const nextMin = Number(event.target.value)
-                            setMinParamStep(Math.min(nextMin, maxParamStep))
-                          }}
-                          className="param-range-input"
-                          aria-label="Minimum parameter filter"
-                        />
-                        <input
-                          type="range"
-                          min={0}
-                          max={maxParamStepIndex}
-                          step={1}
-                          value={maxParamStep}
-                          onChange={(event) => {
-                            const nextMax = Number(event.target.value)
-                            setMaxParamStep(Math.max(nextMax, minParamStep))
-                          }}
-                          className="param-range-input"
-                          aria-label="Maximum parameter filter"
-                        />
-                      </div>
-                    </div>
-                    <span className="shrink-0 text-[11px] text-muted-foreground">
-                      {formatParamBoundLabel(minParamStep, "min")} to {formatParamBoundLabel(maxParamStep, "max")}
-                    </span>
-                  </div>
-                </div>
-              </div>
-            )}
             <div className="overflow-x-auto">
             <table className="ec-htable" style={{ minWidth: 980 }}>
               <thead>
@@ -1204,7 +1020,6 @@ export function EvalDetail({ summary }: EvalDetailProps) {
                                   Avg of {modelResult.aggregate_components.length}
                                 </div>
                               )}
-                              <RowSignalsCompact annotations={rowAnnotations} className="mt-1" />
                             </div>
                           </div>
                         </td>
@@ -1375,10 +1190,6 @@ export function EvalDetail({ summary }: EvalDetailProps) {
                                   )}
                                 </DetailPanel>
-                                {!isResearchView && (
-                                  <ReproducibilityPanel gap={rowAnnotations?.reproducibility_gap} />
-                                )}
                                 <DetailPanel
                                   title={isResearchView ? "Score Breakdown" : "Metric Summary"}
                                   subtitle={
@@ -1636,11 +1447,19 @@ function MultiMetricLeaderboard({
   isResearchView: boolean
 }) {
   const [page, setPage] = useState(1)
-  const [sortKey, setSortKey] = useState<string>("coverage")
   const [sortDirection, setSortDirection] = useState<"asc" | "desc">("desc")
   const [activeSubtaskTab, setActiveSubtaskTab] = useState<string>("all")
   const [minParamStep, setMinParamStep] = useState(0)
-  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_VALUES.length - 1)
   const [expandedRows, setExpandedRows] = useState<Record<string, boolean>>({})
   // Index ModelResultForBenchmark entries by model_info.id so we can power the
@@ -1678,7 +1497,6 @@ function MultiMetricLeaderboard({
     [allMetricKeys]
   )
   const [visibleMetricKeys, setVisibleMetricKeys] = useState<string[]>(() => defaultVisibleMetricKeys)
-  const maxParamStepIndex = PARAM_RANGE_VALUES.length - 1
   const leaderboardMetricMap = useMemo(
     () => new Map(leaderboardMetrics.map((metric) => [metric.column_key, metric])),
     [leaderboardMetrics]
@@ -1725,38 +1543,26 @@ function MultiMetricLeaderboard({
     [visibleMetrics]
   )
-  const numericMinParams = useMemo(() => {
-    if (minParamStep <= 0) {
-      return null
-    }
-    return PARAM_RANGE_VALUES[minParamStep] ?? null
-  }, [minParamStep])
-  const numericMaxParams = useMemo(() => {
-    if (maxParamStep >= PARAM_RANGE_VALUES.length - 1) {
-      return null
-    }
-    return PARAM_RANGE_VALUES[maxParamStep] ?? null
-  }, [maxParamStep])
   const filteredRows = useMemo(() => {
-    return leaderboardRows
-      .filter((row) => {
-        const paramsBillions = getParamsBillionsFromModelInfo(row.model_info)
-        if (numericMinParams != null && (paramsBillions == null || paramsBillions < numericMinParams)) {
-          return false
-        }
-        if (numericMaxParams != null && (paramsBillions == null || paramsBillions > numericMaxParams)) {
-          return false
-        }
-        return true
-      })
-  }, [leaderboardRows, numericMaxParams, numericMinParams])
   const sortedRows = useMemo(() => {
     const rows = [...filteredRows]
@@ -1789,11 +1595,6 @@ function MultiMetricLeaderboard({
         return sortDirection === "asc" ? comparison : -comparison
       }
-      if (sortKey === "coverage") {
-        const comparison = left.metrics_present - right.metrics_present || compareNames(left, right)
-        return sortDirection === "asc" ? comparison : -comparison
-      }
       if (sortKey === "updated") {
         const comparison = compareTimestamps(left.evaluation_timestamp, right.evaluation_timestamp) || compareNames(left, right)
         return sortDirection === "asc" ? comparison : -comparison
@@ -1836,10 +1637,19 @@ function MultiMetricLeaderboard({
   useEffect(() => {
     if (leaderboardMetricMap.has(sortKey) && !visibleMetricColumnKeySet.has(sortKey)) {
-      setSortKey("coverage")
       setSortDirection("desc")
     }
-  }, [leaderboardMetricMap, sortKey, visibleMetricColumnKeySet])
   useEffect(() => {
     if (!hasSubtaskTabs) {
@@ -1858,11 +1668,6 @@ function MultiMetricLeaderboard({
     }
   }, [activeSubtaskTab, hasSubtaskTabs, singleMetricSubtaskTabs])
-  const hasParameterData = useMemo(
-    () => leaderboardRows.some((row) => getParamsBillionsFromModelInfo(row.model_info) != null),
-    [leaderboardRows]
-  )
   const pagedRows = useMemo(
     () => sortedRows.slice(0, page * 50),
     [page, sortedRows]
@@ -1883,18 +1688,12 @@ function MultiMetricLeaderboard({
     })
   }
-  const getVisibleMetricCount = (row: LeaderboardMatrixRow) =>
-    visibleMetrics.reduce(
-      (count, metric) => count + (isNumericScore(row.values[metric.column_key]) ? 1 : 0),
-      0
-    )
   const getDefaultSortDirection = (key: string): "asc" | "desc" => {
     if (key === "model" || key === "developer") {
       return "asc"
     }
-    if (key === "updated" || key === "coverage") {
       return "desc"
     }
@@ -2033,84 +1832,21 @@ function MultiMetricLeaderboard({
         {hasParameterData && (
           <div className="border-b bg-background px-5 py-4 sm:px-6">
-            <div className="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
-              <div className="space-y-1">
-                <div className="text-[11px] font-semibold uppercase tracking-[0.18em] text-muted-foreground">
-                  Parameter range
-                </div>
-                <div className="text-sm text-muted-foreground">
-                  Narrow the matrix to comparable model sizes.
-                </div>
-              </div>
-              <div className="flex min-w-0 flex-1 items-center gap-4 lg:max-w-[40rem]">
-                <div className="min-w-0 flex-1">
-                  <div className="mb-2 flex items-center justify-between text-[10px] font-medium uppercase tracking-[0.14em] text-muted-foreground">
-                    {PARAM_RANGE_MARKERS.map((marker) => (
-                      <span key={marker.label} className="text-center">
-                        {marker.label}
-                      </span>
-                    ))}
-                  </div>
-                  <div className="relative h-4">
-                    <div className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2 rounded-full bg-border/80" />
-                    <div className="absolute inset-x-1.5 top-1/2 h-[3px] -translate-y-1/2">
-                      <div
-                        className="absolute inset-y-0 rounded-full bg-foreground"
-                        style={{
-                          left: `${(minParamStep / maxParamStepIndex) * 100}%`,
-                          right: `${Math.max(100 - (maxParamStep / maxParamStepIndex) * 100, 0)}%`,
-                        }}
-                      />
-                    </div>
-                    <div className="absolute inset-x-1.5 top-1/2 -translate-y-1/2">
-                      {PARAM_RANGE_VALUES.map((_, stepIndex) => (
-                        <span
-                          key={`param-matrix-tick-${stepIndex}`}
-                          className="absolute top-0 h-2 w-px -translate-x-1/2 rounded-full bg-border"
-                          style={{ left: `${(stepIndex / maxParamStepIndex) * 100}%` }}
-                          aria-hidden="true"
-                        />
-                      ))}
-                    </div>
-                    <input
-                      type="range"
-                      min={0}
-                      max={maxParamStepIndex}
-                      step={1}
-                      value={minParamStep}
-                      onChange={(event) => {
-                        const nextMin = Number(event.target.value)
-                        setMinParamStep(Math.min(nextMin, maxParamStep))
-                      }}
-                      className="param-range-input"
-                      aria-label="Minimum parameter filter"
-                    />
-                    <input
-                      type="range"
-                      min={0}
-                      max={maxParamStepIndex}
-                      step={1}
-                      value={maxParamStep}
-                      onChange={(event) => {
-                        const nextMax = Number(event.target.value)
-                        setMaxParamStep(Math.max(nextMax, minParamStep))
-                      }}
-                      className="param-range-input"
-                      aria-label="Maximum parameter filter"
-                    />
-                  </div>
-                </div>
-                <span className="shrink-0 text-[11px] text-muted-foreground">
-                  {formatParamBoundLabel(minParamStep, "min")} to {formatParamBoundLabel(maxParamStep, "max")}
-                </span>
-              </div>
-            </div>
           </div>
         )}
@@ -2135,13 +1871,6 @@ function MultiMetricLeaderboard({
                   {isResearchView ? "Developer" : "Provider"}
                   {getSortIndicator("developer")}
                 </th>
-                <th
-                  className="num"
-                  style={{ width: 110, cursor: "pointer" }}
-                  onClick={() => handleSort("coverage")}
-                >
-                  Coverage{getSortIndicator("coverage")}
-                </th>
                 {visibleMetrics.map((metric) => {
                   const showSubtaskTopline =
                     !hasSubtaskTabs &&
@@ -2248,10 +1977,6 @@ function MultiMetricLeaderboard({
                         >
                           {row.model_info.developer ?? "Unknown developer"}
                         </div>
-                        <RowSignalsCompact
-                          annotations={getRowLevelAnnotations(row, visibleMetrics)}
-                          className="mt-1"
-                        />
                       </div>
                     </div>
                   </td>
@@ -2262,11 +1987,6 @@ function MultiMetricLeaderboard({
                     </div>
                   </td>
-                  <td className="num align-top tabular-nums" style={{ fontSize: 13, fontWeight: 600 }}>
-                    {getVisibleMetricCount(row)}
-                    <span style={{ color: "var(--fg-subtle)", fontWeight: 400 }}>/{visibleMetrics.length}</span>
-                  </td>
                   {visibleMetrics.map((metric) => {
                     const score = row.values[metric.column_key]
                     const annotations = row.annotations_by_metric?.[metric.column_key]
@@ -2561,23 +2281,40 @@ function BenchmarkCardPanel({
   const license = ethical.data_licensing ?? ""
   const shortLicense = license && license !== "Not specified" ? license : null
   return (
     <div className="ec-card" style={{ padding: 0, overflow: "hidden" }}>
-      <div
-        style={{
-          padding: "16px 20px",
-          background: "var(--bg-warm)",
-          borderBottom: "1px solid var(--border-soft)",
-        }}
-      >
-        <div className="flex flex-wrap items-center gap-3 mb-1.5">
-          <BookOpen className="h-4 w-4" style={{ color: "var(--fg-muted)" }} />
-          <span className="kicker kicker-fg" style={{ fontSize: 12, letterSpacing: "0.16em" }}>
-            Benchmark Card
-          </span>
-          {shortLicense && (
-            <span className="ec-tag outline">{shortLicense}</span>
-          )}
           {(flaggedFields.length > 0 || missingFields.length > 0) && (
             <span
               className="font-mono inline-flex items-center gap-1"
@@ -2596,34 +2333,11 @@ function BenchmarkCardPanel({
             </span>
           )}
         </div>
-        <div className="text-[12px]" style={{ color: "var(--fg-muted)" }}>
-          Structured metadata about this benchmark: what it measures, how it was built, and known limitations.
-        </div>
-      </div>
       <div className="space-y-6 p-5 sm:p-6">
         {knownIssues.length > 0 && <KnownIssuesPanel issues={knownIssues} variant="full" />}
-        {/* Overview + domains */}
-        <div className="space-y-3">
-          <p className="text-sm leading-6 text-muted-foreground">{details.overview}</p>
-          <div className="flex flex-wrap gap-2">
-            {domains.map((d) => (
-              <span key={d} className="ec-tag outline">
-                <Tag className="h-3 w-3 shrink-0" />
-                {d}
-              </span>
-            ))}
-            {languages.map((l) => (
-              <span key={l} className="ec-tag outline">
-                <Globe className="h-3 w-3 shrink-0" />
-                {l}
-              </span>
-            ))}
-          </div>
-        </div>
         <div className="grid gap-3 sm:grid-cols-2 xl:grid-cols-3">
           {/* Goal */}
           <div

 import { useAudienceMode } from "@/components/audience-mode-provider"
 import { Fragment, useEffect, useMemo, useState } from "react"
 import Link from "next/link"
+import { BenchmarkSignalsStrip } from "@/components/signals/benchmark-signals-strip"
 import { CompletenessPanel } from "@/components/signals/completeness-panel"
 import { ComparabilityPanel } from "@/components/signals/comparability-panel"
 import { SignalsRowBadges } from "@/components/signals/signals-row-badges"
 import { getCompletenessPopulatedCount } from "@/components/signals/signal-utils"
 import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"
 import { ScoreDistribution } from "@/components/score-distribution"
+import { ParamRangePicker } from "@/components/param-range-picker"
+import {
+  PARAM_RANGE_MAX_INDEX,
+  paramStepToNumeric,
+  parseParamsBillionsFromText,
+  parseParamsBillionsFromModelName,
+} from "@/lib/param-range"
 import {
   Dialog,
   DialogContent,
   )
 }
 function getParamsBillionsFromModelInfo(modelInfo: ModelResultForBenchmark["model_info"]) {
   const additionalDetails = modelInfo.additional_details
   const rawParamsBillions =
   const [expandedRows, setExpandedRows] = useState<Record<string, boolean>>({})
   const [leaderboardPage, setLeaderboardPage] = useState(1)
   const [minParamStep, setMinParamStep] = useState(0)
+  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_MAX_INDEX)
   const maxScore = summary.metric_config.max_score ?? 1
   const minScore = summary.metric_config.min_score ?? 0
   const range = maxScore - minScore
   const normalizeScore = (raw: number) => (range > 0 ? (raw - minScore) / range : raw)
+  const numericMinParams = useMemo(() => paramStepToNumeric(minParamStep, "min"), [minParamStep])
+  const numericMaxParams = useMemo(() => paramStepToNumeric(maxParamStep, "max"), [maxParamStep])
   const sortedResults = useMemo(
     () =>
     [summary.model_results, summary.metric_config.lower_is_better]
   )
+  const [showUnknownSize, setShowUnknownSize] = useState(true)
   const hasParameterData = useMemo(
     () => sortedResults.some((result) => getParamsBillions(result) != null),
     [sortedResults]
     return sortedResults.filter((modelResult) => {
       const paramsBillions = getParamsBillions(modelResult)
+      if (paramsBillions == null) return showUnknownSize
+      if (numericMinParams != null && paramsBillions < numericMinParams) return false
+      if (numericMaxParams != null && paramsBillions > numericMaxParams) return false
       return true
     })
+  }, [numericMaxParams, numericMinParams, showUnknownSize, sortedResults])
   const leaderboardRows = useMemo<LeaderboardRow[]>(() => {
     let currentRank = 0
       [key]: !current[key],
     }))
   const headerOrg = summary.composite_benchmark_name && summary.composite_benchmark_name !== summary.evaluation_name
     ? summary.composite_benchmark_name
     : null
     <div className="space-y-12">
       {/* HERO — paper §3.1 ------------------------------------------------ */}
       <header className="motion-academic-enter">
         <h1
           className="font-bold tracking-[-0.025em]"
+          style={{ fontSize: "clamp(40px, 5vw, 60px)", lineHeight: 1.04, margin: "0 0 12px" }}
         >
           {summary.evaluation_name}
         </h1>
         <CollapsibleContent className="mt-3">
           <div className="space-y-4">
+            {/* Four interpretive signals (paper §4.2.1), benchmark-level. */}
+            <BenchmarkSignalsStrip summary={summary} />
             {/* Metric spec / nested datalist (paper-aligned hairline def-list) */}
             <div className="ec-card warm" style={{ padding: "18px 22px" }}>
               <div className="kicker mb-3">
             </div>
           )}
+          {hasParameterData && (
+            <div className="mb-4">
+              <ParamRangePicker
+                variant="promo"
+                headline="Parameter range"
+                subline="Narrow the leaderboard to comparable model sizes."
+                minStep={minParamStep}
+                maxStep={maxParamStep}
+                onMinChange={setMinParamStep}
+                onMaxChange={setMaxParamStep}
+                onReset={() => {
+                  setMinParamStep(0)
+                  setMaxParamStep(PARAM_RANGE_MAX_INDEX)
                 }}
+                showUnknownSize={showUnknownSize}
+                onShowUnknownSizeChange={setShowUnknownSize}
+              />
+            </div>
+          )}
+          <div className="ec-card" style={{ padding: 0, overflow: "hidden" }}>
             <div className="overflow-x-auto">
             <table className="ec-htable" style={{ minWidth: 980 }}>
               <thead>
                                   Avg of {modelResult.aggregate_components.length}
                                 </div>
                               )}
                             </div>
                           </div>
                         </td>
                                   )}
                                 </DetailPanel>
                                 <DetailPanel
                                   title={isResearchView ? "Score Breakdown" : "Metric Summary"}
                                   subtitle={
   isResearchView: boolean
 }) {
   const [page, setPage] = useState(1)
+  // Default sort: the first root-scope metric (the benchmark's overall
+  // score), falling back to the first metric overall, then to model name.
+  // We don't sort by metric coverage by default — coverage tells you how
+  // many slices reported, not how the model performed.
+  const [sortKey, setSortKey] = useState<string>(() => {
+    const metrics = summary.leaderboard_metrics ?? []
+    const root = metrics.find((m) => m.scope === "root")
+    return root?.column_key ?? metrics[0]?.column_key ?? "model"
+  })
   const [sortDirection, setSortDirection] = useState<"asc" | "desc">("desc")
   const [activeSubtaskTab, setActiveSubtaskTab] = useState<string>("all")
   const [minParamStep, setMinParamStep] = useState(0)
+  const [maxParamStep, setMaxParamStep] = useState(PARAM_RANGE_MAX_INDEX)
   const [expandedRows, setExpandedRows] = useState<Record<string, boolean>>({})
   // Index ModelResultForBenchmark entries by model_info.id so we can power the
     [allMetricKeys]
   )
   const [visibleMetricKeys, setVisibleMetricKeys] = useState<string[]>(() => defaultVisibleMetricKeys)
   const leaderboardMetricMap = useMemo(
     () => new Map(leaderboardMetrics.map((metric) => [metric.column_key, metric])),
     [leaderboardMetrics]
     [visibleMetrics]
   )
+  const numericMinParams = useMemo(() => paramStepToNumeric(minParamStep, "min"), [minParamStep])
+  const numericMaxParams = useMemo(() => paramStepToNumeric(maxParamStep, "max"), [maxParamStep])
+  const [showUnknownSize, setShowUnknownSize] = useState(true)
+  const hasParameterData = useMemo(
+    () => leaderboardRows.some((row) => getParamsBillionsFromModelInfo(row.model_info) != null),
+    [leaderboardRows]
+  )
   const filteredRows = useMemo(() => {
+    return leaderboardRows.filter((row) => {
+      const paramsBillions = getParamsBillionsFromModelInfo(row.model_info)
+      if (paramsBillions == null) return showUnknownSize
+      if (numericMinParams != null && paramsBillions < numericMinParams) return false
+      if (numericMaxParams != null && paramsBillions > numericMaxParams) return false
+      return true
+    })
+  }, [leaderboardRows, numericMaxParams, numericMinParams, showUnknownSize])
   const sortedRows = useMemo(() => {
     const rows = [...filteredRows]
         return sortDirection === "asc" ? comparison : -comparison
       }
       if (sortKey === "updated") {
         const comparison = compareTimestamps(left.evaluation_timestamp, right.evaluation_timestamp) || compareNames(left, right)
         return sortDirection === "asc" ? comparison : -comparison
   useEffect(() => {
     if (leaderboardMetricMap.has(sortKey) && !visibleMetricColumnKeySet.has(sortKey)) {
+      // The currently-sorted metric was hidden — fall back to the first
+      // visible root-scope metric, then the first visible metric overall,
+      // then to the model name.
+      const visibleRoot = leaderboardMetrics.find(
+        (m) => m.scope === "root" && visibleMetricColumnKeySet.has(m.column_key),
+      )
+      const fallback = visibleRoot?.column_key
+        ?? leaderboardMetrics.find((m) => visibleMetricColumnKeySet.has(m.column_key))?.column_key
+        ?? "model"
+      setSortKey(fallback)
       setSortDirection("desc")
     }
+  }, [leaderboardMetricMap, leaderboardMetrics, sortKey, visibleMetricColumnKeySet])
   useEffect(() => {
     if (!hasSubtaskTabs) {
     }
   }, [activeSubtaskTab, hasSubtaskTabs, singleMetricSubtaskTabs])
   const pagedRows = useMemo(
     () => sortedRows.slice(0, page * 50),
     [page, sortedRows]
     })
   }
   const getDefaultSortDirection = (key: string): "asc" | "desc" => {
     if (key === "model" || key === "developer") {
       return "asc"
     }
+    if (key === "updated") {
       return "desc"
     }
         {hasParameterData && (
           <div className="border-b bg-background px-5 py-4 sm:px-6">
+            <ParamRangePicker
+              variant="promo"
+              headline="Parameter range"
+              subline="Narrow the matrix to comparable model sizes."
+              minStep={minParamStep}
+              maxStep={maxParamStep}
+              onMinChange={setMinParamStep}
+              onMaxChange={setMaxParamStep}
+              onReset={() => {
+                setMinParamStep(0)
+                setMaxParamStep(PARAM_RANGE_MAX_INDEX)
+              }}
+              showUnknownSize={showUnknownSize}
+              onShowUnknownSizeChange={setShowUnknownSize}
+            />
           </div>
         )}
                   {isResearchView ? "Developer" : "Provider"}
                   {getSortIndicator("developer")}
                 </th>
                 {visibleMetrics.map((metric) => {
                   const showSubtaskTopline =
                     !hasSubtaskTabs &&
                         >
                           {row.model_info.developer ?? "Unknown developer"}
                         </div>
                       </div>
                     </div>
                   </td>
                     </div>
                   </td>
                   {visibleMetrics.map((metric) => {
                     const score = row.values[metric.column_key]
                     const annotations = row.annotations_by_metric?.[metric.column_key]
   const license = ethical.data_licensing ?? ""
   const shortLicense = license && license !== "Not specified" ? license : null
+  // The outer collapsible trigger names the panel; the prominent top
+  // strip surfaces what readers most often want at a glance — domain
+  // and language tags, license, and any flagged/missing-field badge.
+  const hasChipStrip =
+    domains.length > 0 ||
+    languages.length > 0 ||
+    Boolean(shortLicense) ||
+    flaggedFields.length > 0 ||
+    missingFields.length > 0
   return (
     <div className="ec-card" style={{ padding: 0, overflow: "hidden" }}>
+      {hasChipStrip && (
+        <div
+          className="flex flex-wrap items-center gap-2"
+          style={{
+            padding: "10px 20px",
+            background: "var(--bg-warm)",
+            borderBottom: "1px solid var(--border-soft)",
+          }}
+        >
+          {domains.map((d) => (
+            <span key={`d-${d}`} className="ec-tag outline">
+              <Tag className="h-3 w-3 shrink-0" />
+              {d}
+            </span>
+          ))}
+          {languages.map((l) => (
+            <span key={`l-${l}`} className="ec-tag outline">
+              <Globe className="h-3 w-3 shrink-0" />
+              {l}
+            </span>
+          ))}
+          {shortLicense && <span className="ec-tag outline">{shortLicense}</span>}
           {(flaggedFields.length > 0 || missingFields.length > 0) && (
             <span
               className="font-mono inline-flex items-center gap-1"
             </span>
           )}
         </div>
+      )}
       <div className="space-y-6 p-5 sm:p-6">
         {knownIssues.length > 0 && <KnownIssuesPanel issues={knownIssues} variant="full" />}
         <div className="grid gap-3 sm:grid-cols-2 xl:grid-cols-3">
           {/* Goal */}
           <div

components/family-table.tsx CHANGED Viewed

@@ -5,17 +5,16 @@ import { useRouter } from "next/navigation"
 import { ArrowUpRight, ChevronDown, ChevronRight } from "lucide-react"
 import type { HierarchyFamily, HierarchyLeaf } from "@/lib/backend-artifacts"
-import type { CategoryType } from "@/lib/benchmark-schema"
-const CATEGORY_DOT: Record<string, string> = {
-  General: "bg-sky-400",
-  Reasoning: "bg-violet-400",
-  Agentic: "bg-amber-400",
-  Safety: "bg-rose-400",
-  Code: "bg-emerald-400",
-  Math: "bg-indigo-400",
-  Multilingual: "bg-teal-400",
-}
 const LEAVES_INLINE_MIN = 2
 const LEAVES_INLINE_MAX = 50
@@ -23,89 +22,172 @@ const LEAVES_INLINE_MAX = 50
 interface FamilyTableProps {
   families: HierarchyFamily[]
   totalModels: number
 }
 function slugify(value: string | null | undefined): string {
   return (value ?? "").toLowerCase().replace(/[^a-z0-9]+/g, "")
 }
 interface LeafEntry {
   id: string
   leafKey: string
   leafName: string
   evalsCount: number
 }
-function collectLeafEntries(fam: HierarchyFamily): LeafEntry[] {
   const out: LeafEntry[] = []
   for (const leaf of fam.leaves ?? []) {
     const ids = leaf.eval_summary_ids ?? []
     if (ids.length === 0) continue
     out.push({
       id: ids[0],
       leafKey: leaf.key,
       leafName: leaf.display_name || leaf.key,
       evalsCount: leaf.evals_count ?? ids.length,
     })
   }
   return out
 }
 /**
- * Pick the eval_summary_id that best matches a family's stated display_name.
  *
- * Backend hierarchy data sometimes has a family whose display_name names one
- * specific leaf (e.g. family `llm_stats`, display_name "HumanEval", with 471
- * leaves). The legacy "directIds[0]" pick navigates to whichever leaf was
- * processed first (often `aa_index`) — wrong. This helper:
  *
- * 1. If there is a leaf whose slug matches the family's display_name slug,
- *    prefer that leaf's id. (`HumanEval` → leaf `humaneval`.)
- * 2. Else if there is a direct family-level id whose slug equals the family
- *    key slug, prefer that (genuine family-level page).
- * 3. Otherwise fall back to the first available id.
  */
 function pickFamilyNavId(fam: HierarchyFamily, leafEntries: LeafEntry[]): string | null {
   const directIds = fam.eval_summary_ids ?? []
-  const all: Array<{ id: string; source: "direct" | "leaf"; leafKey?: string; leafName?: string }> = [
-    ...directIds.map((id) => ({ id, source: "direct" as const })),
-    ...leafEntries.map((l) => ({ id: l.id, source: "leaf" as const, leafKey: l.leafKey, leafName: l.leafName })),
-  ]
-  if (all.length === 0) return null
-  if (all.length === 1) return all[0].id
   const famNameSlug = slugify(fam.display_name)
   const famKeySlug = slugify(fam.key)
-  // 1. Leaf slug matches family display_name: e.g. display "HumanEval" → leaf "humaneval"
-  if (famNameSlug && famNameSlug !== famKeySlug) {
-    for (const entry of all) {
-      if (entry.source !== "leaf") continue
-      if (slugify(entry.leafKey) === famNameSlug || slugify(entry.leafName) === famNameSlug) {
-        return entry.id
-      }
     }
   }
-  // 2. Direct family-level id: id slug equals family key slug
-  for (const entry of all) {
-    if (entry.source !== "direct") continue
-    if (slugify(entry.id) === famKeySlug) return entry.id
   }
-  // 3. Direct id starting with the family key only (a true family-level summary)
-  for (const entry of all) {
-    if (entry.source !== "direct") continue
-    const idSlug = slugify(entry.id)
-    if (idSlug.startsWith(famKeySlug) && idSlug.length === famKeySlug.length) {
-      return entry.id
-    }
-  }
-  // 4. Fall back: leaves first, then direct
-  const leafFallback = all.find((e) => e.source === "leaf")
-  if (leafFallback) return leafFallback.id
-  return all[0].id
 }
 interface RowData {
@@ -114,23 +196,63 @@ interface RowData {
   name: string
   keySlug: string
   category: CategoryType
-  composites: number
   benchmarks: number
-  slices: number
-  metrics: number
   evalsCount: number
   leaves: LeafEntry[]
   /** True when the family has many leaves with no clean family-level summary —
    *  we open it expanded so the user picks a leaf directly. */
   isAggregator: boolean
 }
-export function FamilyTable({ families, totalModels }: FamilyTableProps) {
   const router = useRouter()
   const [expanded, setExpanded] = useState<Record<string, boolean>>({})
   const rows = useMemo<RowData[]>(() => {
-    return families.map((fam) => {
       const composites = fam.composites ?? []
       const standalone = fam.standalone_benchmarks ?? []
       const benchmarks = fam.benchmarks ?? []
@@ -141,12 +263,6 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
         ...benchmarks,
         ...composites.flatMap((c) => c.benchmarks ?? []),
       ]
-      const sliceCount =
-        fam.slices?.length ??
-        allBenchmarks.reduce(
-          (sum, b) => sum + ((b as { slices?: unknown[] }).slices?.length ?? 0),
-          0,
-        )
       const metricCount =
         (fam.metrics?.length ?? 0) +
         allBenchmarks.reduce(
@@ -156,51 +272,70 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
       const benchmarkCount =
         allBenchmarks.length > 0 ? allBenchmarks.length : leaves.length
-      const leafEntries = collectLeafEntries(fam)
       const navId = pickFamilyNavId(fam, leafEntries)
-      // An "aggregator" family is one whose display_name doesn't really
-      // describe a single benchmark (its leaves are heterogeneous). We
-      // detect this by counting leaves and, when there are many, prefer
-      // showing the leaf list rather than relying on the family-level id.
-      const isAggregator = leafEntries.length >= LEAVES_INLINE_MIN
-      return {
         key: fam.key,
         navId,
-        name: fam.display_name,
         keySlug: fam.key,
         category: (fam.category ?? "General") as CategoryType,
-        composites: composites.length,
         benchmarks: benchmarkCount,
-        slices: sliceCount,
-        metrics: metricCount,
         evalsCount: fam.evals_count ?? metricCount,
-        leaves: leafEntries,
         isAggregator,
-      }
-    })
-  }, [families])
   return (
     <div className="overflow-x-auto">
       <table className="ec-htable">
         <thead>
           <tr>
-            <th style={{ width: "30%" }}>Family</th>
             <th>Category</th>
-            <th className="num">Suites</th>
             <th className="num">Benchmarks</th>
-            <th className="num">Slices</th>
-            <th className="num">Metrics</th>
             <th className="num">Reported results</th>
             <th style={{ width: 90 }} />
           </tr>
         </thead>
         <tbody>
           {rows.map((row) => {
-            const dotClass = CATEGORY_DOT[row.category] ?? "bg-stone-400"
-            const isExpanded = expanded[row.key] ?? false
             const expandable = row.isAggregator
             const visibleLeaves = isExpanded
               ? row.leaves.slice(0, LEAVES_INLINE_MAX)
@@ -213,12 +348,16 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
               <Fragment key={row.key}>
                 <tr
                   onClick={(event) => {
-                    // Allow chevron click without navigating
                     const target = event.target as HTMLElement
                     if (target.closest("[data-row-toggle]")) return
-                    if (row.navId) router.push(`/evals/${encodeURIComponent(row.navId)}`)
                   }}
-                  style={{ cursor: row.navId ? "pointer" : "default" }}
                 >
                   <td>
                     <div className="flex items-start gap-2.5 min-w-0">
@@ -240,14 +379,18 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
                       ) : (
                         <span className="-ml-1 mt-0.5 inline-block h-4 w-4" aria-hidden />
                       )}
-                      <span
-                        className={`shrink-0 mt-1.5 h-2 w-2 rounded-full ${dotClass}`}
-                        aria-hidden
-                      />
                       <div className="min-w-0">
                         <div className="font-semibold text-[14px] text-[color:var(--fg)] truncate">
                           {row.name}
                         </div>
                         <div className="font-mono text-[10px] tracking-[0.06em] text-[color:var(--fg-subtle)] mt-0.5 truncate">
                           {row.keySlug}
                           {expandable && (
@@ -264,18 +407,9 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
                       {row.category}
                     </span>
                   </td>
-                  <td className="num font-mono text-[13px]">
-                    {row.composites > 0 ? row.composites.toLocaleString() : "—"}
-                  </td>
                   <td className="num font-mono text-[13px]">
                     {row.benchmarks.toLocaleString()}
                   </td>
-                  <td className="num font-mono text-[13px]">
-                    {row.slices > 0 ? row.slices.toLocaleString() : "—"}
-                  </td>
-                  <td className="num font-mono text-[13px]">
-                    {row.metrics > 0 ? row.metrics.toLocaleString() : "—"}
-                  </td>
                   <td className="num font-mono text-[13px]">
                     {row.evalsCount.toLocaleString()}
                     {totalModels > 0 && (
@@ -292,7 +426,7 @@ export function FamilyTable({ families, totalModels }: FamilyTableProps) {
                 {isExpanded && visibleLeaves.length > 0 && (
                   <tr style={{ background: "var(--bg-warm)" }}>
-                    <td colSpan={8} style={{ padding: 0 }}>
                       <div style={{ padding: "10px 24px 14px 64px" }}>
                         <div
                           className="font-mono uppercase mb-2"

 import { ArrowUpRight, ChevronDown, ChevronRight } from "lucide-react"
 import type { HierarchyFamily, HierarchyLeaf } from "@/lib/backend-artifacts"
+import type { BenchmarkCard, CategoryType } from "@/lib/benchmark-schema"
+import type { BenchmarkEvalListItem } from "@/lib/eval-processing"
+/**
+ * Per-category chip colour. Uses oklch tokens so the chip reads against
+ * both light and dark backgrounds; the saturation is held low to stay in
+ * the editorial palette (no candy-bright accents).
+ */
+// Categories use the neutral chip styling — colour-coded chips read as
+// noise against the editorial palette.
 const LEAVES_INLINE_MIN = 2
 const LEAVES_INLINE_MAX = 50
 interface FamilyTableProps {
   families: HierarchyFamily[]
   totalModels: number
+  evalItems?: Map<string, BenchmarkEvalListItem>
+  /** Optional benchmark-metadata index (keyed by benchmark / leaf / family
+   *  key). Used to look up per-leaf domains when the hierarchy doesn't
+   *  carry `leaf.tags.domains`, so the domain filter works on data that
+   *  only ships domains via the metadata file. */
+  benchmarkCards?: Record<string, BenchmarkCard>
+  /** Lower-cased domain slugs to filter the listing. When non-empty, every
+   *  expandable family is auto-expanded and its leaves are restricted to
+   *  those that touch one of the selected domains. Single-benchmark
+   *  families are kept only when their domains intersect the filter.
+   *  Pass `null`/`undefined` to disable filtering. */
+  domainFilter?: Set<string> | null
 }
 function slugify(value: string | null | undefined): string {
   return (value ?? "").toLowerCase().replace(/[^a-z0-9]+/g, "")
 }
+/** Render a family key as a human-readable title — used as a fallback when
+ *  the backend `display_name` is misleading (e.g. names a single leaf instead
+ *  of the family). Common acronyms stay uppercase; everything else is title
+ *  case. */
+const FAMILY_KEY_ACRONYMS = new Set([
+  "llm", "llms", "aa", "hf", "api", "cli", "sql", "gpt", "qa", "ai", "ml",
+  "nlp", "rl", "vqa", "vlm", "mt", "cv",
+])
+function humanizeFamilyKey(key: string): string {
+  return key
+    .split(/[_\-\s]+/)
+    .filter(Boolean)
+    .map((word) => {
+      if (FAMILY_KEY_ACRONYMS.has(word.toLowerCase())) return word.toUpperCase()
+      return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()
+    })
+    .join("-")
+}
 interface LeafEntry {
   id: string
   leafKey: string
   leafName: string
   evalsCount: number
+  domains: string[]
 }
+function collectLeafEntries(
+  fam: HierarchyFamily,
+  benchmarkCards?: Record<string, BenchmarkCard>,
+): LeafEntry[] {
   const out: LeafEntry[] = []
   for (const leaf of fam.leaves ?? []) {
     const ids = leaf.eval_summary_ids ?? []
     if (ids.length === 0) continue
+    // Domain sources, in order of trust:
+    //   (1) hierarchy `leaf.tags.domains` — sometimes absent
+    //   (2) benchmark-metadata keyed by leaf.key
+    //   (3) benchmark-metadata keyed by the leaf's eval_summary_id
+    const collected = new Set<string>()
+    for (const d of leaf.tags?.domains ?? []) collected.add(d.toLowerCase())
+    const cardByLeaf = benchmarkCards?.[leaf.key]
+    for (const d of cardByLeaf?.benchmark_details?.domains ?? []) collected.add(d.toLowerCase())
+    for (const id of ids) {
+      const cardById = benchmarkCards?.[id]
+      for (const d of cardById?.benchmark_details?.domains ?? []) collected.add(d.toLowerCase())
+    }
     out.push({
       id: ids[0],
       leafKey: leaf.key,
       leafName: leaf.display_name || leaf.key,
       evalsCount: leaf.evals_count ?? ids.length,
+      domains: Array.from(collected),
     })
   }
   return out
 }
 /**
+ * Pick the eval_summary_id to navigate to when the user clicks the family
+ * row. Returns null when the family has no genuine family-level summary —
+ * in that case the row click should expand the leaf list instead of
+ * opening one arbitrary child.
  *
+ * Some backend families flatten their leaf eval_summary_ids into the
+ * family's own `eval_summary_ids` array (e.g. family `llm_stats` whose
+ * direct ids are `llm_stats_aa_index`, `llm_stats_humaneval`, ... — each
+ * a leaf summary). Those are NOT family-level composites; treating them
+ * as such is what made clicking "LLM-Stats" land on AA Index.
  *
+ * We filter direct ids down to those that are NOT also leaf ids. Whatever
+ * remains is a real family-level summary. Then we apply slug-based
+ * priority among those.
  */
 function pickFamilyNavId(fam: HierarchyFamily, leafEntries: LeafEntry[]): string | null {
   const directIds = fam.eval_summary_ids ?? []
+  const leafIdSet = new Set(leafEntries.map((l) => l.id))
+  // Real family-level summaries: direct ids that aren't actually leaf ids
+  // pulled up to the family. These resolve to is_aggregated/composite
+  // summaries on the detail page.
+  const compositeDirectIds = directIds.filter((id) => !leafIdSet.has(id))
+  if (compositeDirectIds.length === 0) {
+    // No genuine family-level composite. If there's exactly one leaf, the
+    // family is just that leaf in disguise — open it. Otherwise return
+    // null and let the caller expand the list.
+    if (leafEntries.length === 1) return leafEntries[0].id
+    return null
+  }
+  if (compositeDirectIds.length === 1) return compositeDirectIds[0]
   const famNameSlug = slugify(fam.display_name)
   const famKeySlug = slugify(fam.key)
+  // 1. Direct composite whose slug equals the family display_name slug
+  if (famNameSlug) {
+    for (const id of compositeDirectIds) {
+      if (slugify(id) === famNameSlug) return id
     }
   }
+  // 2. Direct composite whose slug equals the family key slug
+  for (const id of compositeDirectIds) {
+    if (slugify(id) === famKeySlug) return id
   }
+  // 3. First direct composite
+  return compositeDirectIds[0]
+}
+/** Returns a one-line description for the family — but only when the
+ *  description applies to the whole family. Specifically: we only use the
+ *  benchmark_card overview attached to the family's *own* navigation
+ *  target (a family-level/composite eval). We don't borrow descriptions
+ *  from individual leaves, because a leaf's description describes that
+ *  one benchmark, not the family as a whole. */
+function pickFamilyDescription(
+  navId: string | null,
+  leafEntries: LeafEntry[],
+  evalItems: Map<string, BenchmarkEvalListItem> | undefined,
+): string | null {
+  if (!evalItems || !navId) return null
+  // If navId resolved to a leaf (single-benchmark family), the leaf's
+  // description IS the family's description — that case is fine.
+  // If navId resolved to a composite, ditto. The only case we exclude is
+  // navId === null (no family-level summary), which the early return
+  // covers.
+  void leafEntries
+  const overview = evalItems.get(navId)?.benchmark_card?.benchmark_details?.overview
+  if (!overview) return null
+  return overview.length > 140 ? overview.slice(0, 137) + "…" : overview
+}
+/** Detects whether the family's `display_name` is misleading: backend data
+ *  sometimes labels a family after one of its leaves (e.g. family
+ *  `llm_stats` with display_name "HumanEval"). When that's the case the
+ *  row should be titled with the humanized key instead, so the user can
+ *  see they're looking at a *family* rather than a single benchmark. */
+function isFamilyDisplayNameMisleading(fam: HierarchyFamily, leafEntries: LeafEntry[]): boolean {
+  const nameSlug = slugify(fam.display_name)
+  if (!nameSlug) return false
+  if (nameSlug === slugify(fam.key)) return false
+  if (leafEntries.length < 2) return false
+  return leafEntries.some(
+    (l) => slugify(l.leafKey) === nameSlug || slugify(l.leafName) === nameSlug,
+  )
 }
 interface RowData {
   name: string
   keySlug: string
   category: CategoryType
   benchmarks: number
   evalsCount: number
   leaves: LeafEntry[]
   /** True when the family has many leaves with no clean family-level summary —
    *  we open it expanded so the user picks a leaf directly. */
   isAggregator: boolean
+  description: string | null
 }
+export function FamilyTable({
+  families,
+  totalModels,
+  evalItems,
+  benchmarkCards,
+  domainFilter,
+}: FamilyTableProps) {
   const router = useRouter()
   const [expanded, setExpanded] = useState<Record<string, boolean>>({})
+  const filterActive = Boolean(domainFilter && domainFilter.size > 0)
+  function leafMatchesFilter(leaf: LeafEntry): boolean {
+    if (!filterActive || !domainFilter) return true
+    return leaf.domains.some((d) => domainFilter.has(d))
+  }
+  function familyMatchesFilter(
+    fam: HierarchyFamily,
+    navId: string | null,
+    leafEntries: LeafEntry[],
+  ): boolean {
+    if (!filterActive || !domainFilter) return true
+    if (leafEntries.some(leafMatchesFilter)) return true
+    const candidates: BenchmarkCard | undefined = (() => {
+      if (navId) {
+        const fromList = evalItems?.get(navId)?.benchmark_card
+        if (fromList) return fromList
+      }
+      return undefined
+    })()
+    const sources: Array<string[]> = []
+    if (candidates) sources.push(candidates.benchmark_details?.domains ?? [])
+    sources.push(benchmarkCards?.[fam.key]?.benchmark_details?.domains ?? [])
+    for (const id of fam.eval_summary_ids ?? []) {
+      sources.push(benchmarkCards?.[id]?.benchmark_details?.domains ?? [])
+    }
+    for (const list of sources) {
+      for (const d of list) {
+        if (domainFilter.has(d.trim().toLowerCase())) return true
+      }
+    }
+    return false
+  }
   const rows = useMemo<RowData[]>(() => {
+    const out: RowData[] = []
+    for (const fam of families) {
       const composites = fam.composites ?? []
       const standalone = fam.standalone_benchmarks ?? []
       const benchmarks = fam.benchmarks ?? []
         ...benchmarks,
         ...composites.flatMap((c) => c.benchmarks ?? []),
       ]
       const metricCount =
         (fam.metrics?.length ?? 0) +
         allBenchmarks.reduce(
       const benchmarkCount =
         allBenchmarks.length > 0 ? allBenchmarks.length : leaves.length
+      const leafEntries = collectLeafEntries(fam, benchmarkCards)
       const navId = pickFamilyNavId(fam, leafEntries)
+      // An "aggregator" family has heterogeneous leaves; we expand it
+      // inline so the user can pick a benchmark directly. When the family
+      // has no real composite summary (navId === null) it's necessarily
+      // an aggregator — clicking the row toggles expand instead of
+      // navigating.
+      const isAggregator = leafEntries.length >= LEAVES_INLINE_MIN || navId == null
+      const displayName = isFamilyDisplayNameMisleading(fam, leafEntries)
+        ? humanizeFamilyKey(fam.key)
+        : fam.display_name
+      // Description sourcing: prefer the eval item the row navigates to;
+      // when there's no navId or its eval item carries no overview, walk
+      // the leaves until we find one whose benchmark_card has one. That
+      // way an aggregator family ("HELM", "BFCL") whose family-level row
+      // doesn't directly link to a single eval still surfaces a one-line
+      // description from any of its component benchmarks.
+      const description = pickFamilyDescription(navId, leafEntries, evalItems)
+      if (!familyMatchesFilter(fam, navId, leafEntries)) continue
+      const visibleLeafEntries = filterActive
+        ? leafEntries.filter(leafMatchesFilter)
+        : leafEntries
+      out.push({
         key: fam.key,
         navId,
+        name: displayName,
         keySlug: fam.key,
         category: (fam.category ?? "General") as CategoryType,
         benchmarks: benchmarkCount,
         evalsCount: fam.evals_count ?? metricCount,
+        leaves: visibleLeafEntries,
         isAggregator,
+        description,
+      })
+    }
+    return out
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [families, evalItems, benchmarkCards, domainFilter])
   return (
     <div className="overflow-x-auto">
       <table className="ec-htable">
         <thead>
           <tr>
+            <th style={{ width: "60%" }}>Family</th>
             <th>Category</th>
             <th className="num">Benchmarks</th>
             <th className="num">Reported results</th>
             <th style={{ width: 90 }} />
           </tr>
         </thead>
         <tbody>
           {rows.map((row) => {
+            // When a domain filter is active we auto-expand every aggregator
+            // so the matching leaves are immediately visible, but still let
+            // the user collapse a row manually via the chevron.
+            const isExpanded = filterActive
+              ? expanded[row.key] ?? true
+              : expanded[row.key] ?? false
             const expandable = row.isAggregator
             const visibleLeaves = isExpanded
               ? row.leaves.slice(0, LEAVES_INLINE_MAX)
               <Fragment key={row.key}>
                 <tr
                   onClick={(event) => {
+                    // Allow chevron click without double-handling
                     const target = event.target as HTMLElement
                     if (target.closest("[data-row-toggle]")) return
+                    if (row.navId) {
+                      router.push(`/evals/${encodeURIComponent(row.navId)}`)
+                    } else if (expandable) {
+                      setExpanded((current) => ({ ...current, [row.key]: !isExpanded }))
+                    }
                   }}
+                  style={{ cursor: row.navId || expandable ? "pointer" : "default" }}
                 >
                   <td>
                     <div className="flex items-start gap-2.5 min-w-0">
                       ) : (
                         <span className="-ml-1 mt-0.5 inline-block h-4 w-4" aria-hidden />
                       )}
                       <div className="min-w-0">
                         <div className="font-semibold text-[14px] text-[color:var(--fg)] truncate">
                           {row.name}
                         </div>
+                        {row.description && (
+                          <div
+                            className="mt-0.5"
+                            style={{ fontSize: 12, color: "var(--fg-muted)", lineHeight: 1.45, display: "-webkit-box", WebkitLineClamp: 2, WebkitBoxOrient: "vertical", overflow: "hidden" }}
+                          >
+                            {row.description}
+                          </div>
+                        )}
                         <div className="font-mono text-[10px] tracking-[0.06em] text-[color:var(--fg-subtle)] mt-0.5 truncate">
                           {row.keySlug}
                           {expandable && (
                       {row.category}
                     </span>
                   </td>
                   <td className="num font-mono text-[13px]">
                     {row.benchmarks.toLocaleString()}
                   </td>
                   <td className="num font-mono text-[13px]">
                     {row.evalsCount.toLocaleString()}
                     {totalModels > 0 && (
                 {isExpanded && visibleLeaves.length > 0 && (
                   <tr style={{ background: "var(--bg-warm)" }}>
+                    <td colSpan={5} style={{ padding: 0 }}>
                       <div style={{ padding: "10px 24px 14px 64px" }}>
                         <div
                           className="font-mono uppercase mb-2"

components/param-range-picker.tsx ADDED Viewed

	@@ -0,0 +1,244 @@

+"use client"
+import { useId } from "react"
+import {
+  PARAM_RANGE_MARKERS,
+  PARAM_RANGE_MAX_INDEX,
+  PARAM_RANGE_VALUES,
+  formatParamBoundLabel,
+} from "@/lib/param-range"
+export type ParamRangeVariant = "default" | "inline" | "promo"
+interface ParamRangePickerProps {
+  /** Index into PARAM_RANGE_VALUES for the lower handle (0 = "< 1B"). */
+  minStep: number
+  /** Index into PARAM_RANGE_VALUES for the upper handle (max = "> 500B"). */
+  maxStep: number
+  onMinChange: (next: number) => void
+  onMaxChange: (next: number) => void
+  /**
+   * `default` — Variant A: bracketed range with a labelled rail and a boxed
+   * mono readout, suitable for use as the headline call-out at the top of a
+   * leaderboard.
+   *
+   * `inline` — Variant B: a single-line picker with no boxed readout, sized
+   * to drop into a hairline toolbar alongside Sort and Filter pickers.
+   *
+   * `promo` — Variant C: warm-background framed slider with a left accent
+   * rule. Use when the slider actively reframes a chart/matrix below it.
+   */
+  variant?: ParamRangeVariant
+  /** Headline shown to the left of the slider (default & promo variants). */
+  headline?: string
+  /** Sub-text shown under the headline (default & promo variants). */
+  subline?: string
+  /** Callback to reset both handles to the open range. When provided, a
+   *  Reset affordance is rendered next to the readout while the slider
+   *  is constrained. */
+  onReset?: () => void
+  /** When defined, renders a small "Show models without known size" pill
+   *  next to the readout. The toggle is independent of the slider — when
+   *  off, models with no detected size are filtered out regardless of
+   *  where the handles are. */
+  showUnknownSize?: boolean
+  onShowUnknownSizeChange?: (next: boolean) => void
+  className?: string
+}
+/**
+ * Themed dual-handle parameter-range picker. Shape and colour come from the
+ * design system: hairline rail, square outline thumbs, mono uppercase tick
+ * labels above the rail, and a boxed mono readout for the explicit bounds.
+ *
+ * The two `<input type="range">` elements provide native dragging + arrow-key
+ * a11y. The visual rail/fill/ticks/thumbs are absolutely-positioned overlays;
+ * the inputs themselves are kept transparent except for their thumbs (see
+ * `.param-range-input` in globals.css).
+ */
+export function ParamRangePicker({
+  minStep,
+  maxStep,
+  onMinChange,
+  onMaxChange,
+  variant = "default",
+  headline = "Parameter range",
+  subline = "Narrow the matrix to comparable model sizes.",
+  onReset,
+  showUnknownSize,
+  onShowUnknownSizeChange,
+  className,
+}: ParamRangePickerProps) {
+  const minId = useId()
+  const maxId = useId()
+  const isInline = variant === "inline"
+  const isPromo = variant === "promo"
+  const minPercent = (minStep / PARAM_RANGE_MAX_INDEX) * 100
+  const maxPercent = (maxStep / PARAM_RANGE_MAX_INDEX) * 100
+  const isConstrained = minStep > 0 || maxStep < PARAM_RANGE_MAX_INDEX
+  const track = (
+    <div className="pr-track-wrap">
+      <div className="pr-ticks" aria-hidden>
+        {PARAM_RANGE_MARKERS.map((marker, idx) => {
+          const isFirst = idx === 0
+          const isLast = idx === PARAM_RANGE_MARKERS.length - 1
+          const active = marker.step === minStep || marker.step === maxStep
+          return (
+            <div
+              key={marker.label}
+              className={`pr-tick${active ? " on" : ""}`}
+              style={{
+                left: `${(marker.step / PARAM_RANGE_MAX_INDEX) * 100}%`,
+              }}
+            >
+              <span
+                style={{
+                  transform: isFirst
+                    ? "translateX(0)"
+                    : isLast
+                    ? "translateX(-100%)"
+                    : "translateX(-50%)",
+                  marginLeft: 0,
+                }}
+              >
+                {marker.label}
+              </span>
+            </div>
+          )
+        })}
+      </div>
+      <div className="pr-rail" />
+      <div
+        className="pr-fill"
+        style={{
+          left: `${minPercent}%`,
+          width: `${Math.max(maxPercent - minPercent, 0)}%`,
+        }}
+      />
+      {/* Hidden inter-bucket micro-ticks to give the rail a metered feel */}
+      <div className="pr-microticks" aria-hidden>
+        {PARAM_RANGE_VALUES.map((_, stepIndex) => (
+          <span
+            key={`pr-micro-${stepIndex}`}
+            style={{ left: `${(stepIndex / PARAM_RANGE_MAX_INDEX) * 100}%` }}
+          />
+        ))}
+      </div>
+      {/* Native inputs provide a11y + drag; we hide them visually and rely
+          on the .param-range-input thumb styling for the visible handles. */}
+      <input
+        id={minId}
+        type="range"
+        min={0}
+        max={PARAM_RANGE_MAX_INDEX}
+        step={1}
+        value={minStep}
+        onChange={(event) => {
+          const next = Number(event.target.value)
+          onMinChange(Math.min(next, maxStep))
+        }}
+        className="param-range-input"
+        aria-label={`Minimum ${headline.toLowerCase()}`}
+      />
+      <input
+        id={maxId}
+        type="range"
+        min={0}
+        max={PARAM_RANGE_MAX_INDEX}
+        step={1}
+        value={maxStep}
+        onChange={(event) => {
+          const next = Number(event.target.value)
+          onMaxChange(Math.max(next, minStep))
+        }}
+        className="param-range-input"
+        aria-label={`Maximum ${headline.toLowerCase()}`}
+      />
+    </div>
+  )
+  const resetBtn = onReset && isConstrained ? (
+    <button
+      type="button"
+      onClick={onReset}
+      className="pr-reset"
+      aria-label="Reset parameter range"
+    >
+      Reset
+    </button>
+  ) : null
+  const unknownToggle =
+    onShowUnknownSizeChange != null ? (
+      <button
+        type="button"
+        onClick={() => onShowUnknownSizeChange(!showUnknownSize)}
+        className={`pr-unknown-toggle${showUnknownSize ? " on" : ""}`}
+        aria-pressed={Boolean(showUnknownSize)}
+        title="Models without a reported parameter count"
+      >
+        <span className="pr-unknown-toggle-box" aria-hidden>
+          {showUnknownSize ? "✓" : ""}
+        </span>
+        Unknown size
+      </button>
+    ) : null
+  const readout = (
+    <div className="pr-readout-cell">
+      <div className={`pr-readout${isInline ? " inline" : ""}`}>
+        <span>{formatParamBoundLabel(minStep, "min")}</span>
+        <span className="arrow">{isInline ? "–" : "→"}</span>
+        <span>{formatParamBoundLabel(maxStep, "max")}</span>
+      </div>
+      {unknownToggle}
+      {resetBtn}
+    </div>
+  )
+  if (isInline) {
+    return (
+      <div className={`pr-slider inline${className ? ` ${className}` : ""}`}>
+        <span className="pr-label inline-label">
+          <strong>{headline}</strong>
+        </span>
+        {track}
+        {readout}
+      </div>
+    )
+  }
+  if (isPromo) {
+    return (
+      <div className={`pr-promo${className ? ` ${className}` : ""}`}>
+        <div className="pr-promo-head">
+          <span className="kicker">{headline}</span>
+          <p>{subline}</p>
+        </div>
+        <div className="pr-slider pr-slider-track-only">
+          {track}
+          {readout}
+        </div>
+      </div>
+    )
+  }
+  // Default (Variant A)
+  return (
+    <div className={`pr-slider${className ? ` ${className}` : ""}`}>
+      <div className="pr-label">
+        <strong>{headline}</strong>
+        {subline}
+      </div>
+      {track}
+      {readout}
+    </div>
+  )
+}

components/signals/benchmark-signals-strip.tsx ADDED Viewed

	@@ -0,0 +1,626 @@

+"use client"
+import type { BenchmarkEvalSummary } from "@/lib/eval-processing"
+import type { ModelResultForBenchmark } from "@/lib/eval-processing"
+type SignalId = "reproducibility" | "completeness" | "provenance" | "comparability"
+const SIGNAL_GLYPHS: Record<SignalId, string> = {
+  reproducibility: "R",
+  completeness: "C",
+  provenance: "P",
+  comparability: "X",
+}
+const SIGNAL_NAMES: Record<SignalId, string> = {
+  reproducibility: "Reproducibility",
+  completeness: "Completeness",
+  provenance: "Provenance",
+  comparability: "Comparability",
+}
+const SIGNAL_ASKS: Record<SignalId, string> = {
+  reproducibility: "Could someone re-run this benchmark with what's documented?",
+  completeness: "How much of the benchmark card is filled in?",
+  provenance: "Who reported these scores and how many parties have replicated?",
+  comparability: "Where multiple reports exist, do they agree?",
+}
+/**
+ * Reproducibility — paper §4.2.1, signal spec §3.
+ *
+ * The spec lists `temperature, top_p, max_tokens, prompt_template` as the
+ * base required fields. In the live EEE corpus only `temperature` and
+ * `max_tokens` are reliably populated, so we restrict the check to those
+ * two for now (per maintainer guidance). Agentic benchmarks additionally
+ * require `eval_plan` and `eval_limits` — the spec's classification rule
+ * is followed verbatim.
+ */
+const BASE_REQUIRED_FIELDS = ["temperature", "max_tokens"] as const
+const AGENTIC_REQUIRED_FIELDS = ["eval_plan", "eval_limits"] as const
+const FIELD_LABELS: Record<string, string> = {
+  temperature: "temperature",
+  top_p: "top-p",
+  max_tokens: "max tokens",
+  prompt_template: "prompt template",
+  eval_plan: "eval plan",
+  eval_limits: "eval limits",
+}
+/** Setup fields compared to detect variant divergence (spec §6.1.2). */
+const COMPARABILITY_COMPARE_FIELDS = [
+  "temperature",
+  "top_p",
+  "top_k",
+  "max_tokens",
+  "prompt_template",
+  "reasoning",
+] as const
+/**
+ * Benchmark-level rollup of the four interpretive signals (paper §4.2.1,
+ * spec v1.0 §§3-6). Mirrors `CorpusSignalsStrip` but operates over a
+ * single `BenchmarkEvalSummary`.
+ *
+ * Each tile reports one headline statistic that reads "higher is better,
+ * more documentation = better", so the four are visually comparable.
+ */
+export function BenchmarkSignalsStrip({ summary }: { summary: BenchmarkEvalSummary }) {
+  const repro = deriveReproducibility(summary)
+  const comp = deriveCompleteness(summary)
+  const prov = deriveProvenance(summary)
+  const cmp = deriveComparability(summary)
+  return (
+    <div
+      className="grid gap-x-6 gap-y-3"
+      style={{
+        gridTemplateColumns: "repeat(auto-fit, minmax(220px, 1fr))",
+        border: "1px solid var(--border-soft)",
+        background: "var(--bg)",
+        padding: "12px 16px",
+      }}
+    >
+      <SignalRow id="reproducibility" {...repro} />
+      <SignalRow id="completeness" {...comp} />
+      <SignalRow id="provenance" {...prov} />
+      <SignalRow id="comparability" {...cmp} />
+    </div>
+  )
+}
+interface DerivedSignal {
+  statValue: string
+  statUnit: string
+  headline: string
+  detail: string
+}
+// ──────────────────────────────────────────────────────────────────────────
+// Reproducibility (spec §3)
+// ──────────────────────────────────────────────────────────────────────────
+function isAgenticBenchmark(summary: BenchmarkEvalSummary): boolean {
+  const tasks = summary.benchmark_card?.purpose_and_intended_users?.tasks
+  if (Array.isArray(tasks)) {
+    const set = new Set(tasks.map((t) => String(t).toLowerCase()))
+    if (set.has("agentic") || set.has("tool_use") || set.has("multi_step_agent")) return true
+  }
+  for (const r of summary.model_results ?? []) {
+    const args = getGenerationArgs(r)
+    if (args && args.agentic_eval_config != null) return true
+  }
+  return false
+}
+function getGenerationArgs(result: ModelResultForBenchmark): Record<string, unknown> | null {
+  const gc = (result.result as { generation_config?: { generation_args?: Record<string, unknown> } } | undefined)
+    ?.generation_config
+  if (!gc) return null
+  const args = gc.generation_args
+  return args && typeof args === "object" ? args : null
+}
+function deriveReproducibility(summary: BenchmarkEvalSummary): DerivedSignal {
+  const triples = summary.model_results ?? []
+  const agentic = isAgenticBenchmark(summary)
+  const required: string[] = agentic
+    ? [...BASE_REQUIRED_FIELDS, ...AGENTIC_REQUIRED_FIELDS]
+    : [...BASE_REQUIRED_FIELDS]
+  if (triples.length === 0) {
+    return {
+      statValue: "—",
+      statUnit: "",
+      headline: "Reproducibility doesn't apply — no reported scores.",
+      detail: "",
+    }
+  }
+  const fieldMissing = new Map<string, number>(required.map((f) => [f, 0]))
+  let triplesWithoutGap = 0
+  for (const triple of triples) {
+    const args = getGenerationArgs(triple) ?? {}
+    let allPresent = true
+    for (const f of required) {
+      if (!isPopulated(args[f])) {
+        fieldMissing.set(f, (fieldMissing.get(f) ?? 0) + 1)
+        allPresent = false
+      }
+    }
+    if (allPresent) triplesWithoutGap++
+  }
+  const total = triples.length
+  const score = triplesWithoutGap / total
+  const topMissing = Array.from(fieldMissing.entries())
+    .filter(([, n]) => n > 0)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 2)
+    .map(([f, n]) => `${FIELD_LABELS[f] ?? f} (${formatPct(n / total)})`)
+    .join(", ")
+  const headline =
+    score === 1
+      ? "Every reported score has a complete generation config."
+      : score === 0
+      ? "No reported score has all required setup fields."
+      : `${triplesWithoutGap} of ${total} triples document the full setup.`
+  const detail = topMissing
+    ? `Most often missing: ${topMissing}.`
+    : `Required: ${required.map((f) => FIELD_LABELS[f] ?? f).join(", ")}.`
+  return { statValue: pctNum(score), statUnit: "%", headline, detail }
+}
+// ──────────────────────────────────────────────────────────────────────────
+// Completeness (spec §4)
+// ──────────────────────────────────────────────────────────────────────────
+interface CompletenessField {
+  path: string
+  label: string
+  coverage: "full" | "partial" | "reserved"
+  /** For partial: list of sub-item names whose presence is checked. */
+  subitems?: readonly string[]
+}
+const COMPLETENESS_FIELD_SET: readonly CompletenessField[] = [
+  { path: "benchmark_details.overview", label: "overview", coverage: "full" },
+  { path: "benchmark_details.data_type", label: "data type", coverage: "full" },
+  {
+    path: "benchmark_details",
+    label: "domains / languages / resources",
+    coverage: "partial",
+    subitems: ["domains", "languages", "resources"],
+  },
+  {
+    path: "purpose_and_intended_users",
+    label: "purpose",
+    coverage: "partial",
+    subitems: ["goal", "audience", "tasks", "limitations"],
+  },
+  {
+    path: "data",
+    label: "data",
+    coverage: "partial",
+    subitems: ["source", "size", "format", "annotation"],
+  },
+  {
+    path: "methodology",
+    label: "methodology",
+    coverage: "partial",
+    subitems: ["methods", "metrics", "calculation", "interpretation", "baseline_results", "validation"],
+  },
+  {
+    path: "ethical_and_legal_considerations",
+    label: "ethical & legal",
+    coverage: "partial",
+    subitems: ["privacy_and_anonymity", "data_licensing", "consent_procedures", "compliance_with_regulations"],
+  },
+  // Reserved — counted in the denominator even when unset (spec §4.2).
+  { path: "evalcards.lifecycle_status", label: "lifecycle status", coverage: "reserved" },
+] as const
+function deriveCompleteness(summary: BenchmarkEvalSummary): DerivedSignal {
+  const card = summary.benchmark_card
+  const fieldScores: { path: string; label: string; coverage: CompletenessField["coverage"]; score: number }[] = []
+  for (const field of COMPLETENESS_FIELD_SET) {
+    let score = 0
+    if (field.coverage === "reserved") {
+      // The eval-summary payload doesn't currently carry an
+      // evalcards.lifecycle_status section, so this scores 0 for now.
+      // It still occupies a denominator slot per spec.
+      score = 0
+    } else if (field.coverage === "full") {
+      const value = card ? readCardPath(card, field.path) : undefined
+      score = isPopulated(value) ? 1 : 0
+    } else {
+      // partial
+      const parent = card ? (readCardPath(card, field.path) as Record<string, unknown> | undefined) : undefined
+      const subs = field.subitems ?? []
+      if (!parent || subs.length === 0) {
+        score = 0
+      } else {
+        let populated = 0
+        for (const key of subs) if (isPopulated(parent[key])) populated++
+        score = populated / subs.length
+      }
+    }
+    fieldScores.push({ path: field.path, label: field.label, coverage: field.coverage, score })
+  }
+  const total = fieldScores.length
+  const sumScore = fieldScores.reduce((acc, f) => acc + f.score, 0)
+  const completeness = total > 0 ? sumScore / total : null
+  const populatedCount = fieldScores.reduce((acc, f) => acc + (f.score === 1 ? 1 : 0), 0)
+  const partialCount = fieldScores.filter((f) => f.score > 0 && f.score < 1).length
+  const missingCount = fieldScores.filter((f) => f.score === 0).length
+  const topMissing = fieldScores
+    .filter((f) => f.score === 0 && f.coverage !== "reserved")
+    .slice(0, 2)
+    .map((f) => f.label)
+    .join(", ")
+  const headline = !card
+    ? "No benchmark card has been authored yet."
+    : completeness === 1
+    ? "Every documented field is populated."
+    : completeness != null && completeness >= 0.6
+    ? "Most documented fields are populated."
+    : "Several documented fields are still empty."
+  const detail = !card
+    ? "Reading context will lean on whatever the leaderboard JSON provides."
+    : `${populatedCount} full · ${partialCount} partial · ${missingCount} missing of ${total}${
+        topMissing ? ` · gaps: ${topMissing}` : ""
+      }`
+  return { statValue: pctNum(completeness), statUnit: "%", headline, detail }
+}
+function readCardPath(card: unknown, path: string): unknown {
+  if (!card || typeof card !== "object") return undefined
+  let cur: unknown = card
+  for (const segment of path.split(".")) {
+    if (cur == null || typeof cur !== "object") return undefined
+    cur = (cur as Record<string, unknown>)[segment]
+  }
+  return cur
+}
+// ──────────────────────────────────────────────────────────────────────────
+// Provenance (spec §5)
+// ──────────────────────────────────────────────────────────────────────────
+type ProvenanceSourceType = "first_party" | "third_party" | "collaborative" | "unspecified"
+function readSourceType(result: ModelResultForBenchmark): ProvenanceSourceType {
+  const sm = result.source_metadata as { evaluator_relationship?: string } | undefined
+  const rel = sm?.evaluator_relationship
+  if (rel === "first_party" || rel === "third_party" || rel === "collaborative") return rel
+  return "unspecified"
+}
+function readSourceOrg(result: ModelResultForBenchmark): string | null {
+  const sm = result.source_metadata as { source_organization_name?: string } | undefined
+  const org = sm?.source_organization_name
+  if (typeof org === "string" && org.trim().length > 0) return org.trim()
+  return null
+}
+function metricKeyForResult(result: ModelResultForBenchmark): string {
+  const r = result.result as { metric_summary_id?: string; metric_key?: string; evaluation_name?: string } | undefined
+  return r?.metric_summary_id ?? r?.metric_key ?? r?.evaluation_name ?? ""
+}
+function modelKeyForResult(result: ModelResultForBenchmark): string {
+  return result.model_info?.id ?? result.model_info?.name ?? ""
+}
+function deriveProvenance(summary: BenchmarkEvalSummary): DerivedSignal {
+  const triples = summary.model_results ?? []
+  if (triples.length === 0) {
+    return {
+      statValue: "—",
+      statUnit: "",
+      headline: "No reported scores yet.",
+      detail: "",
+    }
+  }
+  const counts: Record<ProvenanceSourceType, number> = {
+    first_party: 0,
+    third_party: 0,
+    collaborative: 0,
+    unspecified: 0,
+  }
+  const distinctOrgs = new Set<string>()
+  const orgsByGroup = new Map<string, Set<string>>()
+  for (const t of triples) {
+    counts[readSourceType(t)]++
+    const org = readSourceOrg(t)
+    if (org) distinctOrgs.add(org)
+    const groupKey = `${modelKeyForResult(t)}::${metricKeyForResult(t)}`
+    if (org) {
+      const existing = orgsByGroup.get(groupKey)
+      if (existing) existing.add(org)
+      else orgsByGroup.set(groupKey, new Set([org]))
+    }
+  }
+  const total = triples.length
+  const attributed = total - counts.unspecified
+  const score = attributed / total
+  const multiSourceGroups = Array.from(orgsByGroup.values()).filter((s) => s.size > 1).length
+  const eligibleGroups = orgsByGroup.size
+  const multiRate = eligibleGroups > 0 ? multiSourceGroups / eligibleGroups : null
+  const headline =
+    counts.unspecified === total
+      ? "No triple carries an attribution."
+      : multiSourceGroups > 0
+      ? `${multiSourceGroups} of ${eligibleGroups} (model, metric) groups have reports from more than one party.`
+      : `Single-source benchmark: ${distinctOrgs.size} reporting org${distinctOrgs.size === 1 ? "" : "s"}.`
+  const dist: string[] = []
+  if (counts.first_party > 0) dist.push(`${formatPct(counts.first_party / total)} first-party`)
+  if (counts.third_party > 0) dist.push(`${formatPct(counts.third_party / total)} third-party`)
+  if (counts.collaborative > 0) dist.push(`${formatPct(counts.collaborative / total)} collaborative`)
+  if (counts.unspecified > 0) dist.push(`${formatPct(counts.unspecified / total)} unspecified`)
+  const detailBits = [dist.join(" · ")]
+  if (multiRate != null) detailBits.push(`${formatPct(multiRate)} multi-source`)
+  return { statValue: pctNum(score), statUnit: "%", headline, detail: detailBits.join(" · ") }
+}
+// ──────────────────────────────────────────────────────────────────────────
+// Comparability (spec §6)
+// ──────────────���───────────────────────────────────────────────────────────
+function computeThreshold(metricConfig: BenchmarkEvalSummary["metric_config"]): number {
+  if (!metricConfig) return 0.05
+  const unit = (metricConfig as { unit?: string; metric_unit?: string }).unit
+    ?? (metricConfig as { metric_unit?: string }).metric_unit
+  const scoreType = (metricConfig as { score_type?: string }).score_type
+  if (unit === "proportion" || scoreType === "continuous_normalized") return 0.05
+  if (unit === "percent") return 5.0
+  const min = metricConfig.min_score
+  const max = metricConfig.max_score
+  if (typeof min === "number" && typeof max === "number" && max > min) return 0.05 * (max - min)
+  return 0.05
+}
+function median(values: number[]): number {
+  if (values.length === 0) return Number.NaN
+  const sorted = [...values].sort((a, b) => a - b)
+  const mid = Math.floor(sorted.length / 2)
+  return sorted.length % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2
+}
+function deriveComparability(summary: BenchmarkEvalSummary): DerivedSignal {
+  const triples = summary.model_results ?? []
+  if (triples.length === 0) {
+    return { statValue: "—", statUnit: "", headline: "No reported scores yet.", detail: "" }
+  }
+  const threshold = computeThreshold(summary.metric_config)
+  // Group triples by (model_id, metric_path).
+  const groups = new Map<
+    string,
+    Array<{ score: number; args: Record<string, unknown>; org: string | null }>
+  >()
+  for (const t of triples) {
+    const score = t.score_details?.score
+    if (typeof score !== "number" || !Number.isFinite(score)) continue
+    const key = `${modelKeyForResult(t)}::${metricKeyForResult(t)}`
+    const args = getGenerationArgs(t) ?? {}
+    const entry = { score, args, org: readSourceOrg(t) }
+    const list = groups.get(key)
+    if (list) list.push(entry)
+    else groups.set(key, [entry])
+  }
+  let variantEligible = 0
+  let variantDivergent = 0
+  let crossPartyEligible = 0
+  let crossPartyDivergent = 0
+  for (const list of groups.values()) {
+    if (list.length < 2) continue
+    // Variant divergence — same group, different setups (spec §6.1).
+    const setupValueSets = new Map<string, Set<string>>()
+    for (const entry of list) {
+      for (const f of COMPARABILITY_COMPARE_FIELDS) {
+        const valKey = JSON.stringify(entry.args[f] ?? null)
+        let set = setupValueSets.get(f)
+        if (!set) {
+          set = new Set()
+          setupValueSets.set(f, set)
+        }
+        set.add(valKey)
+      }
+    }
+    const setupsDiffer = Array.from(setupValueSets.values()).some((s) => s.size > 1)
+    if (setupsDiffer) {
+      variantEligible++
+      const scores = list.map((e) => e.score)
+      const divergence = Math.max(...scores) - Math.min(...scores)
+      if (divergence > threshold) variantDivergent++
+    }
+    // Cross-party divergence — same group, different orgs (spec §6.2).
+    const byOrg = new Map<string, number[]>()
+    for (const entry of list) {
+      if (!entry.org) continue
+      const arr = byOrg.get(entry.org)
+      if (arr) arr.push(entry.score)
+      else byOrg.set(entry.org, [entry.score])
+    }
+    if (byOrg.size >= 2) {
+      crossPartyEligible++
+      const orgScores = Array.from(byOrg.values()).map((s) => median(s))
+      const divergence = Math.max(...orgScores) - Math.min(...orgScores)
+      if (divergence > threshold) crossPartyDivergent++
+    }
+  }
+  const totalEligible = variantEligible + crossPartyEligible
+  if (totalEligible === 0) {
+    return {
+      statValue: "—",
+      statUnit: "",
+      headline: "Not enough overlapping reports to compare.",
+      detail: `${groups.size} (model, metric) groups · 0 multi-report`,
+    }
+  }
+  const totalDivergent = variantDivergent + crossPartyDivergent
+  const agreementRate = (totalEligible - totalDivergent) / totalEligible
+  const detailBits: string[] = []
+  if (variantEligible > 0) {
+    detailBits.push(
+      `variant ${variantEligible - variantDivergent}/${variantEligible} agree`,
+    )
+  }
+  if (crossPartyEligible > 0) {
+    detailBits.push(
+      `cross-party ${crossPartyEligible - crossPartyDivergent}/${crossPartyEligible} agree`,
+    )
+  }
+  detailBits.push(`threshold ±${formatNumber(threshold)}`)
+  const headline =
+    totalDivergent === 0
+      ? "Reports that are directly comparable agree within threshold."
+      : totalDivergent === totalEligible
+      ? "Every comparable report disagrees beyond threshold."
+      : `${totalEligible - totalDivergent} of ${totalEligible} comparable reports agree.`
+  return {
+    statValue: pctNum(agreementRate),
+    statUnit: "%",
+    headline,
+    detail: detailBits.join(" · "),
+  }
+}
+// ──────────────────────────────────────────────────────────────────────��───
+// Helpers
+// ──────────────────────────────────────────────────────────────────────────
+function isPopulated(value: unknown): boolean {
+  if (value == null) return false
+  if (typeof value === "string") return value.trim().length > 0
+  if (Array.isArray(value)) return value.length > 0
+  if (typeof value === "object") return Object.keys(value as Record<string, unknown>).length > 0
+  return Boolean(value)
+}
+function pctNum(value: number | null | undefined): string {
+  if (value == null || !Number.isFinite(value)) return "—"
+  if (value <= 0) return "0"
+  if (value < 0.01) return "<1"
+  return `${Math.round(value * 100)}`
+}
+function formatPct(value: number | null | undefined): string {
+  if (value == null || !Number.isFinite(value)) return "—"
+  if (value === 0) return "0%"
+  if (value < 0.01) return "<1%"
+  return `${Math.round(value * 100)}%`
+}
+function formatNumber(value: number): string {
+  if (!Number.isFinite(value)) return "—"
+  if (value >= 100) return value.toFixed(0)
+  if (value >= 1) return value.toFixed(2)
+  return value.toFixed(3).replace(/0+$/g, "").replace(/\.$/, "")
+}
+/**
+ * Compact one-row layout per signal — meant to drop in alongside the Card
+ * Quality Notes box, not dominate the page like the corpus dashboard's
+ * full tile grid. Glyph + name + percent live on one line; one short
+ * sentence summarising the score lives below. The "Asks" prompt is moved
+ * to the title attribute so it stays discoverable on hover but doesn't
+ * eat vertical space.
+ */
+function SignalRow({
+  id,
+  statValue,
+  statUnit,
+  headline,
+  detail,
+}: {
+  id: SignalId
+} & DerivedSignal) {
+  return (
+    <div className="min-w-0" title={SIGNAL_ASKS[id]}>
+      <div className="flex items-center gap-2">
+        <span
+          className={`sig-glyph sig-${id}`}
+          style={{ width: 22, height: 22, fontSize: "0.7rem", flexShrink: 0 }}
+        >
+          <span>{SIGNAL_GLYPHS[id]}</span>
+        </span>
+        <span
+          className="font-mono uppercase"
+          style={{
+            fontSize: 10,
+            letterSpacing: "0.14em",
+            color: "var(--fg-muted)",
+            flexShrink: 0,
+          }}
+        >
+          {SIGNAL_NAMES[id]}
+        </span>
+        <span
+          className="ml-auto font-mono tabular-nums"
+          style={{ fontSize: 16, fontWeight: 600, color: "var(--fg)" }}
+        >
+          {statValue}
+          {statUnit && (
+            <span style={{ fontSize: 10, color: "var(--fg-subtle)", marginLeft: 2 }}>
+              {statUnit}
+            </span>
+          )}
+        </span>
+      </div>
+      <div
+        className="mt-1"
+        style={{
+          fontSize: 11,
+          lineHeight: 1.4,
+          color: "var(--fg-muted)",
+          display: "-webkit-box",
+          WebkitLineClamp: 2,
+          WebkitBoxOrient: "vertical",
+          overflow: "hidden",
+        }}
+      >
+        {headline}
+        {detail && (
+          <span style={{ color: "var(--fg-subtle)" }}>
+            {" · "}
+            {detail}
+          </span>
+        )}
+      </div>
+    </div>
+  )
+}

components/signals/corpus-dashboard.tsx CHANGED Viewed

@@ -20,7 +20,7 @@ import {
   formatPercent,
 } from "./signal-utils"
-const CATEGORY_ORDER = ["agentic", "general", "knowledge", "reasoning", "safety", "other"]
 const SOURCE_COLORS: Record<string, string> = {
   first_party: "bg-amber-500",
@@ -51,13 +51,21 @@ export function CorpusDashboard({
   }, [mode])
   const categoryKeys = useMemo(
-    () =>
-      CATEGORY_ORDER.filter((category) =>
-        aggregates.reproducibility.by_category[category] ||
-        aggregates.completeness.by_category[category] ||
-        aggregates.provenance.by_category[category] ||
-        aggregates.comparability.by_category[category]
-      ),
     [aggregates]
   )
@@ -190,25 +198,14 @@ function CompletenessSection({
       icon={<ClipboardCheck className="h-5 w-5" />}
       title="Reporting Completeness"
       subtitle="How much benchmark documentation is populated."
-      headline={formatPercent(block.completeness_score_mean)}
-      headlineLabel={`Median ${formatPercent(block.completeness_score_median)} across ${block.total_benchmarks.toLocaleString()} benchmarks`}
     >
       {scores.length > 0 && <Histogram scores={scores} />}
-      <div className="mt-4 grid gap-2">
-        {Object.entries(block.per_field_population).slice(0, 10).map(([field, value]) => (
-          <div key={field} className="rounded-xl border border-border/60 bg-background px-3 py-2">
-            <div className="flex items-start justify-between gap-3 text-sm">
-              <span className="font-medium">{formatFieldLabel(field)}</span>
-              <span className="shrink-0 tabular-nums text-muted-foreground">
-                {formatPercent(value.mean_score)}
-              </span>
-            </div>
-            <div className="mt-2 grid gap-1.5">
-              <MetricBar label="Any data" value={value.populated_rate} compact />
-              <MetricBar label="Fully populated" value={value.fully_populated_rate} compact />
-            </div>
-          </div>
-        ))}
       </div>
     </DashboardSection>
   )
@@ -217,14 +214,16 @@ function CompletenessSection({
 function ProvenanceSection({ block }: { block: ProvenanceCorpusBlock }) {
   const distribution = block.source_type_distribution
   const total = Object.values(distribution).reduce((sum, value) => sum + value, 0)
   return (
     <DashboardSection
       icon={<BarChart3 className="h-5 w-5" />}
       title="Provenance"
       subtitle="Who reported the scores, and whether groups have multiple sources."
-      headline={formatPercent(block.multi_source_rate)}
-      headlineLabel="of (model, benchmark, metric) groups have multiple reporting sources"
     >
       <div className="overflow-hidden rounded-full border border-border/70 bg-muted/30">
         <div className="flex h-4 w-full">
@@ -240,34 +239,40 @@ function ProvenanceSection({ block }: { block: ProvenanceCorpusBlock }) {
       </div>
       <div className="mt-3 grid gap-2 sm:grid-cols-2">
-        <RatioTile label="Multi-source groups" value={block.multi_source_rate} count={block.multi_source_groups} />
-        <RatioTile label="First-party only groups" value={block.first_party_only_rate} count={block.first_party_only_groups} />
       </div>
     </DashboardSection>
   )
 }
 function ComparabilitySection({ block }: { block: ComparabilityCorpusBlock }) {
   return (
     <DashboardSection
       icon={<GitCompareArrows className="h-5 w-5" />}
       title="Comparability"
       subtitle="Eligible groups where scores diverge across setups or reporting organizations."
-      headline={formatNullableRate(block.variant_divergence_rate)}
-      headlineLabel={`${block.variant_divergent_groups.toLocaleString()} of ${block.variant_eligible_groups.toLocaleString()} setup-eligible groups diverge`}
     >
       <div className="grid gap-3 md:grid-cols-2">
         <ComparabilityRateCard
           title="Variant divergence"
-          rate={block.variant_divergence_rate}
-          eligible={block.variant_eligible_groups}
-          divergent={block.variant_divergent_groups}
         />
         <ComparabilityRateCard
           title="Cross-party divergence"
-          rate={block.cross_party_divergence_rate}
-          eligible={block.cross_party_eligible_groups}
-          divergent={block.cross_party_divergent_groups}
         />
       </div>
     </DashboardSection>
@@ -288,6 +293,15 @@ function CategoryPanel({
   comparability?: ComparabilityCorpusBlock
 }) {
   const categoryLabel = `${category.charAt(0).toUpperCase()}${category.slice(1)}`
   return (
     <section className="rounded-2xl border border-border/70 bg-card p-4 shadow-sm">
@@ -297,11 +311,11 @@ function CategoryPanel({
       </div>
       <div className="grid gap-3 sm:grid-cols-2">
         <MiniMetric label="Reproducibility gaps" value={formatPercent(reproducibility?.reproducibility_gap_rate)} />
-        <MiniMetric label="Documentation mean" value={formatPercent(completeness?.completeness_score_mean)} />
-        <MiniMetric label="Multi-source groups" value={formatPercent(provenance?.multi_source_rate)} />
-        <MiniMetric label="Variant divergence" value={formatNullableRate(comparability?.variant_divergence_rate)} />
       </div>
-      {comparability?.cross_party_divergence_rate == null && (
         <div className="mt-3 rounded-xl border border-dashed border-border/70 bg-muted/10 px-3 py-2 text-sm text-muted-foreground">
           Cross-party divergence: N/A - not enough multi-org coverage.
         </div>
@@ -411,7 +425,7 @@ function RatioTile({ label, value, count }: { label: string; value: number | nul
       <div className="text-sm font-medium">{label}</div>
       <div className="mt-1 flex items-baseline justify-between gap-2">
         <span className="text-xl font-semibold tabular-nums">{formatPercent(value)}</span>
-        <span className="text-xs text-muted-foreground">{count.toLocaleString()} groups</span>
       </div>
     </div>
   )
@@ -463,6 +477,11 @@ function formatNullableRate(value: number | null | undefined) {
   return value == null ? "N/A" : formatPercent(value)
 }
 function formatGeneratedDate(value: string) {
   const date = new Date(value)
   if (Number.isNaN(date.getTime())) {

   formatPercent,
 } from "./signal-utils"
+const CATEGORY_ORDER = ["Agentic", "General", "Knowledge", "Reasoning", "Safety", "Other"]
 const SOURCE_COLORS: Record<string, string> = {
   first_party: "bg-amber-500",
   }, [mode])
   const categoryKeys = useMemo(
+    () => {
+      const available = new Set([
+        ...Object.keys(aggregates.reproducibility.by_category),
+        ...Object.keys(aggregates.completeness.by_category),
+        ...Object.keys(aggregates.provenance.by_category),
+        ...Object.keys(aggregates.comparability.by_category),
+      ])
+      return [
+        ...CATEGORY_ORDER.filter((category) => available.has(category)),
+        ...Array.from(available)
+          .filter((category) => !CATEGORY_ORDER.includes(category))
+          .sort((a, b) => a.localeCompare(b)),
+      ]
+    },
     [aggregates]
   )
       icon={<ClipboardCheck className="h-5 w-5" />}
       title="Reporting Completeness"
       subtitle="How much benchmark documentation is populated."
+      headline={formatPercent(block.completeness_avg)}
+      headlineLabel={`Range ${formatPercent(block.completeness_min)} to ${formatPercent(block.completeness_max)} across ${block.total_triples.toLocaleString()} reported score triples`}
     >
       {scores.length > 0 && <Histogram scores={scores} />}
+      <div className="mt-4 grid gap-2 sm:grid-cols-3">
+        <MiniMetric label="Minimum" value={formatPercent(block.completeness_min)} />
+        <MiniMetric label="Average" value={formatPercent(block.completeness_avg)} />
+        <MiniMetric label="Maximum" value={formatPercent(block.completeness_max)} />
       </div>
     </DashboardSection>
   )
 function ProvenanceSection({ block }: { block: ProvenanceCorpusBlock }) {
   const distribution = block.source_type_distribution
   const total = Object.values(distribution).reduce((sum, value) => sum + value, 0)
+  const multiSourceRate = rate(block.multi_source_triples, block.total_triples)
+  const firstPartyOnlyRate = rate(block.first_party_only_triples, block.total_triples)
   return (
     <DashboardSection
       icon={<BarChart3 className="h-5 w-5" />}
       title="Provenance"
       subtitle="Who reported the scores, and whether groups have multiple sources."
+      headline={formatPercent(multiSourceRate)}
+      headlineLabel="of reported score triples have multiple reporting sources"
     >
       <div className="overflow-hidden rounded-full border border-border/70 bg-muted/30">
         <div className="flex h-4 w-full">
       </div>
       <div className="mt-3 grid gap-2 sm:grid-cols-2">
+        <RatioTile label="Multi-source triples" value={multiSourceRate} count={block.multi_source_triples} />
+        <RatioTile label="First-party only triples" value={firstPartyOnlyRate} count={block.first_party_only_triples} />
       </div>
     </DashboardSection>
   )
 }
 function ComparabilitySection({ block }: { block: ComparabilityCorpusBlock }) {
+  const variantRate = rate(block.variant_divergent_count, block.groups_with_variant_check)
+  const crossPartyRate = rate(
+    block.cross_party_divergent_count,
+    block.groups_with_cross_party_check
+  )
   return (
     <DashboardSection
       icon={<GitCompareArrows className="h-5 w-5" />}
       title="Comparability"
       subtitle="Eligible groups where scores diverge across setups or reporting organizations."
+      headline={formatNullableRate(variantRate)}
+      headlineLabel={`${block.variant_divergent_count.toLocaleString()} of ${block.groups_with_variant_check.toLocaleString()} setup-eligible groups diverge`}
     >
       <div className="grid gap-3 md:grid-cols-2">
         <ComparabilityRateCard
           title="Variant divergence"
+          rate={variantRate}
+          eligible={block.groups_with_variant_check}
+          divergent={block.variant_divergent_count}
         />
         <ComparabilityRateCard
           title="Cross-party divergence"
+          rate={crossPartyRate}
+          eligible={block.groups_with_cross_party_check}
+          divergent={block.cross_party_divergent_count}
         />
       </div>
     </DashboardSection>
   comparability?: ComparabilityCorpusBlock
 }) {
   const categoryLabel = `${category.charAt(0).toUpperCase()}${category.slice(1)}`
+  const multiSourceRate = rate(provenance?.multi_source_triples, provenance?.total_triples)
+  const variantRate = rate(
+    comparability?.variant_divergent_count,
+    comparability?.groups_with_variant_check
+  )
+  const crossPartyRate = rate(
+    comparability?.cross_party_divergent_count,
+    comparability?.groups_with_cross_party_check
+  )
   return (
     <section className="rounded-2xl border border-border/70 bg-card p-4 shadow-sm">
       </div>
       <div className="grid gap-3 sm:grid-cols-2">
         <MiniMetric label="Reproducibility gaps" value={formatPercent(reproducibility?.reproducibility_gap_rate)} />
+        <MiniMetric label="Documentation mean" value={formatPercent(completeness?.completeness_avg)} />
+        <MiniMetric label="Multi-source triples" value={formatPercent(multiSourceRate)} />
+        <MiniMetric label="Variant divergence" value={formatNullableRate(variantRate)} />
       </div>
+      {crossPartyRate == null && (
         <div className="mt-3 rounded-xl border border-dashed border-border/70 bg-muted/10 px-3 py-2 text-sm text-muted-foreground">
           Cross-party divergence: N/A - not enough multi-org coverage.
         </div>
       <div className="text-sm font-medium">{label}</div>
       <div className="mt-1 flex items-baseline justify-between gap-2">
         <span className="text-xl font-semibold tabular-nums">{formatPercent(value)}</span>
+        <span className="text-xs text-muted-foreground">{count.toLocaleString()} triples</span>
       </div>
     </div>
   )
   return value == null ? "N/A" : formatPercent(value)
 }
+function rate(numerator: number | null | undefined, denominator: number | null | undefined) {
+  if (numerator == null || denominator == null || denominator <= 0) return null
+  return numerator / denominator
+}
 function formatGeneratedDate(value: string) {
   const date = new Date(value)
   if (Number.isNaN(date.getTime())) {

components/signals/corpus-signals-strip.tsx CHANGED Viewed

@@ -39,8 +39,13 @@ export function CorpusSignalsStrip({
   const tpShare = totalReports > 0 ? prov.source_type_distribution.third_party / totalReports : 0
   const fpShare = totalReports > 0 ? prov.source_type_distribution.first_party / totalReports : 0
-  const cmpRate = cmp.variant_divergence_rate
-  const crossPartyAvailable = cmp.cross_party_eligible_groups > 0
   return (
     <div className="signals-grid">
@@ -58,29 +63,29 @@ export function CorpusSignalsStrip({
       />
       <SignalTile
         id="completeness"
-        statValue={pctNum(comp.completeness_score_mean)}
         statUnit="%"
-        headline={`mean across ${comp.total_benchmarks.toLocaleString()} benchmarks (median ${formatPct(comp.completeness_score_median)}).`}
-        detail="Source-provenance fields populate fully; preregistration fields are unmet."
         asks="Is the benchmark itself documented well enough to interpret a score on it?"
       />
       <SignalTile
         id="provenance"
-        statValue={pctNum(prov.multi_source_rate)}
         statUnit="%"
-        headline="of (model, benchmark) groups have reports from more than one party."
-        detail={`${formatPct(tpShare)} third-party, ${formatPct(fpShare)} first-party of ${totalReports.toLocaleString()} results.`}
         asks="Who reported this score, and have others reproduced it?"
       />
       <SignalTile
         id="comparability"
         statValue={pctNum(cmpRate)}
         statUnit="%"
-        headline={`of setup-eligible groups diverge across variants (${cmp.variant_divergent_groups.toLocaleString()} of ${cmp.variant_eligible_groups.toLocaleString()}).`}
         detail={
           crossPartyAvailable
-            ? `Cross-party divergence: ${formatPct(cmp.cross_party_divergence_rate)}.`
-            : "Cross-party divergence not yet computable — too few multi-org reports."
         }
         asks="Are scores on the same benchmark actually measuring the same thing?"
       />
@@ -154,6 +159,11 @@ function formatPct(value: number | null | undefined): string {
   return `${Math.round(value * 100)}%`
 }
 const FIELD_LABELS: Record<string, string> = {
   temperature: "temperature",
   max_tokens: "max tokens",

   const tpShare = totalReports > 0 ? prov.source_type_distribution.third_party / totalReports : 0
   const fpShare = totalReports > 0 ? prov.source_type_distribution.first_party / totalReports : 0
+  const multiSourceRate = rate(prov.multi_source_triples, prov.total_triples)
+  const cmpRate = rate(cmp.variant_divergent_count, cmp.groups_with_variant_check)
+  const crossPartyRate = rate(
+    cmp.cross_party_divergent_count,
+    cmp.groups_with_cross_party_check
+  )
+  const crossPartyAvailable = cmp.groups_with_cross_party_check > 0
   return (
     <div className="signals-grid">
       />
       <SignalTile
         id="completeness"
+        statValue={pctNum(comp.completeness_avg)}
         statUnit="%"
+        headline={`mean across ${comp.total_triples.toLocaleString()} reported score triples.`}
+        detail={`Observed range: ${formatPct(comp.completeness_min)} to ${formatPct(comp.completeness_max)}.`}
         asks="Is the benchmark itself documented well enough to interpret a score on it?"
       />
       <SignalTile
         id="provenance"
+        statValue={pctNum(multiSourceRate)}
         statUnit="%"
+        headline="of reported score triples have reports from more than one party."
+        detail={`${formatPct(tpShare)} third-party, ${formatPct(fpShare)} first-party of ${totalReports.toLocaleString()} triples.`}
         asks="Who reported this score, and have others reproduced it?"
       />
       <SignalTile
         id="comparability"
         statValue={pctNum(cmpRate)}
         statUnit="%"
+        headline={`of setup-eligible groups diverge across variants (${cmp.variant_divergent_count.toLocaleString()} of ${cmp.groups_with_variant_check.toLocaleString()}).`}
         detail={
           crossPartyAvailable
+            ? `Cross-party divergence: ${formatPct(crossPartyRate)}.`
+            : "Cross-party divergence not yet computable: too few multi-org reports."
         }
         asks="Are scores on the same benchmark actually measuring the same thing?"
       />
   return `${Math.round(value * 100)}%`
 }
+function rate(numerator: number | null | undefined, denominator: number | null | undefined) {
+  if (numerator == null || denominator == null || denominator <= 0) return null
+  return numerator / denominator
+}
 const FIELD_LABELS: Record<string, string> = {
   temperature: "temperature",
   max_tokens: "max tokens",

data/benchmarks.json DELETED Viewed

@@ -1,90 +0,0 @@
-[
-  {
-    "benchmark": "ace",
-    "model_count": 12
-  },
-  {
-    "benchmark": "apex-agents",
-    "model_count": 20
-  },
-  {
-    "benchmark": "apex-v1",
-    "model_count": 10
-  },
-  {
-    "benchmark": "appworld_test_normal",
-    "model_count": 3
-  },
-  {
-    "benchmark": "bfcl",
-    "model_count": 109
-  },
-  {
-    "benchmark": "browsecompplus",
-    "model_count": 3
-  },
-  {
-    "benchmark": "global-mmlu-lite",
-    "model_count": 27
-  },
-  {
-    "benchmark": "helm_capabilities",
-    "model_count": 61
-  },
-  {
-    "benchmark": "helm_classic",
-    "model_count": 67
-  },
-  {
-    "benchmark": "helm_instruct",
-    "model_count": 4
-  },
-  {
-    "benchmark": "helm_lite",
-    "model_count": 91
-  },
-  {
-    "benchmark": "helm_mmlu",
-    "model_count": 79
-  },
-  {
-    "benchmark": "hfopenllm_v2",
-    "model_count": 4493
-  },
-  {
-    "benchmark": "la_leaderboard",
-    "model_count": 5
-  },
-  {
-    "benchmark": "livecodebenchpro",
-    "model_count": 27
-  },
-  {
-    "benchmark": "reward-bench",
-    "model_count": 328
-  },
-  {
-    "benchmark": "swe-bench",
-    "model_count": 3
-  },
-  {
-    "benchmark": "tau-bench-2_airline",
-    "model_count": 3
-  },
-  {
-    "benchmark": "tau-bench-2_retail",
-    "model_count": 3
-  },
-  {
-    "benchmark": "tau-bench-2_telecom",
-    "model_count": 3
-  },
-  {
-    "benchmark": "terminal-bench-2.0",
-    "model_count": 37
-  },
-  {
-    "benchmark": "theory_of_mind",
-    "model_count": 1
-  }
-]

data/developers.json DELETED Viewed

@@ -1,3150 +0,0 @@
-[
-  {
-    "developer": "0-hero",
-    "model_count": 3
-  },
-  {
-    "developer": "01-ai",
-    "model_count": 20
-  },
-  {
-    "developer": "1-800-LLMs",
-    "model_count": 2
-  },
-  {
-    "developer": "1024m",
-    "model_count": 2
-  },
-  {
-    "developer": "152334H",
-    "model_count": 1
-  },
-  {
-    "developer": "1TuanPham",
-    "model_count": 2
-  },
-  {
-    "developer": "3rd-Degree-Burn",
-    "model_count": 4
-  },
-  {
-    "developer": "4season",
-    "model_count": 1
-  },
-  {
-    "developer": "aaditya",
-    "model_count": 1
-  },
-  {
-    "developer": "AALF",
-    "model_count": 4
-  },
-  {
-    "developer": "Aashraf995",
-    "model_count": 4
-  },
-  {
-    "developer": "abacusai",
-    "model_count": 10
-  },
-  {
-    "developer": "AbacusResearch",
-    "model_count": 1
-  },
-  {
-    "developer": "abhishek",
-    "model_count": 5
-  },
-  {
-    "developer": "abideen",
-    "model_count": 1
-  },
-  {
-    "developer": "adamo1139",
-    "model_count": 1
-  },
-  {
-    "developer": "adriszmar",
-    "model_count": 1
-  },
-  {
-    "developer": "AELLM",
-    "model_count": 2
-  },
-  {
-    "developer": "aevalone",
-    "model_count": 1
-  },
-  {
-    "developer": "agentlans",
-    "model_count": 9
-  },
-  {
-    "developer": "AGI-0",
-    "model_count": 3
-  },
-  {
-    "developer": "Ahdoot",
-    "model_count": 2
-  },
-  {
-    "developer": "Ahjeong",
-    "model_count": 2
-  },
-  {
-    "developer": "ahmeda335",
-    "model_count": 1
-  },
-  {
-    "developer": "AI-MO",
-    "model_count": 2
-  },
-  {
-    "developer": "AI-Sweden-Models",
-    "model_count": 2
-  },
-  {
-    "developer": "AI2",
-    "model_count": 7
-  },
-  {
-    "developer": "ai21",
-    "model_count": 12
-  },
-  {
-    "developer": "ai21labs",
-    "model_count": 1
-  },
-  {
-    "developer": "ai4bharat",
-    "model_count": 1
-  },
-  {
-    "developer": "AI4free",
-    "model_count": 2
-  },
-  {
-    "developer": "AicoresSecurity",
-    "model_count": 4
-  },
-  {
-    "developer": "AIDC-AI",
-    "model_count": 1
-  },
-  {
-    "developer": "aixonlab",
-    "model_count": 3
-  },
-  {
-    "developer": "akhadangi",
-    "model_count": 5
-  },
-  {
-    "developer": "akjindal53244",
-    "model_count": 1
-  },
-  {
-    "developer": "alcholjung",
-    "model_count": 1
-  },
-  {
-    "developer": "Alepach",
-    "model_count": 3
-  },
-  {
-    "developer": "aleph-alpha",
-    "model_count": 3
-  },
-  {
-    "developer": "AlephAlpha",
-    "model_count": 3
-  },
-  {
-    "developer": "Alibaba",
-    "model_count": 6
-  },
-  {
-    "developer": "Alibaba-NLP",
-    "model_count": 1
-  },
-  {
-    "developer": "aliyun",
-    "model_count": 1
-  },
-  {
-    "developer": "allenai",
-    "model_count": 162
-  },
-  {
-    "developer": "allknowingroger",
-    "model_count": 88
-  },
-  {
-    "developer": "allura-org",
-    "model_count": 9
-  },
-  {
-    "developer": "aloobun",
-    "model_count": 2
-  },
-  {
-    "developer": "alpindale",
-    "model_count": 2
-  },
-  {
-    "developer": "Alsebay",
-    "model_count": 1
-  },
-  {
-    "developer": "altomek",
-    "model_count": 1
-  },
-  {
-    "developer": "Amaorynho",
-    "model_count": 4
-  },
-  {
-    "developer": "amazon",
-    "model_count": 8
-  },
-  {
-    "developer": "amd",
-    "model_count": 1
-  },
-  {
-    "developer": "Amu",
-    "model_count": 2
-  },
-  {
-    "developer": "anakin87",
-    "model_count": 1
-  },
-  {
-    "developer": "anthracite-org",
-    "model_count": 12
-  },
-  {
-    "developer": "Anthropic",
-    "model_count": 34
-  },
-  {
-    "developer": "apple",
-    "model_count": 1
-  },
-  {
-    "developer": "applied-compute",
-    "model_count": 1
-  },
-  {
-    "developer": "appvoid",
-    "model_count": 2
-  },
-  {
-    "developer": "arcee-ai",
-    "model_count": 11
-  },
-  {
-    "developer": "argilla",
-    "model_count": 2
-  },
-  {
-    "developer": "argilla-warehouse",
-    "model_count": 1
-  },
-  {
-    "developer": "arisin",
-    "model_count": 1
-  },
-  {
-    "developer": "ark",
-    "model_count": 1
-  },
-  {
-    "developer": "ArliAI",
-    "model_count": 2
-  },
-  {
-    "developer": "arshiaafshani",
-    "model_count": 1
-  },
-  {
-    "developer": "Arthur-LAGACHERIE",
-    "model_count": 1
-  },
-  {
-    "developer": "Artples",
-    "model_count": 2
-  },
-  {
-    "developer": "Aryanne",
-    "model_count": 3
-  },
-  {
-    "developer": "asharsha30",
-    "model_count": 1
-  },
-  {
-    "developer": "ashercn97",
-    "model_count": 2
-  },
-  {
-    "developer": "assskelad",
-    "model_count": 1
-  },
-  {
-    "developer": "AtAndDev",
-    "model_count": 1
-  },
-  {
-    "developer": "Ateron",
-    "model_count": 3
-  },
-  {
-    "developer": "athirdpath",
-    "model_count": 1
-  },
-  {
-    "developer": "AtlaAI",
-    "model_count": 2
-  },
-  {
-    "developer": "AuraIndustries",
-    "model_count": 4
-  },
-  {
-    "developer": "Aurel9",
-    "model_count": 1
-  },
-  {
-    "developer": "automerger",
-    "model_count": 1
-  },
-  {
-    "developer": "avemio",
-    "model_count": 1
-  },
-  {
-    "developer": "awnr",
-    "model_count": 5
-  },
-  {
-    "developer": "aws-prototyping",
-    "model_count": 1
-  },
-  {
-    "developer": "axolotl-ai-co",
-    "model_count": 1
-  },
-  {
-    "developer": "Ayush-Singh",
-    "model_count": 1
-  },
-  {
-    "developer": "Azure99",
-    "model_count": 6
-  },
-  {
-    "developer": "Ba2han",
-    "model_count": 1
-  },
-  {
-    "developer": "BAAI",
-    "model_count": 14
-  },
-  {
-    "developer": "baconnier",
-    "model_count": 2
-  },
-  {
-    "developer": "baebee",
-    "model_count": 3
-  },
-  {
-    "developer": "bamec66557",
-    "model_count": 27
-  },
-  {
-    "developer": "Baptiste-HUVELLE-10",
-    "model_count": 1
-  },
-  {
-    "developer": "BEE-spoke-data",
-    "model_count": 9
-  },
-  {
-    "developer": "belztjti",
-    "model_count": 2
-  },
-  {
-    "developer": "BenevolenceMessiah",
-    "model_count": 2
-  },
-  {
-    "developer": "benhaotang",
-    "model_count": 1
-  },
-  {
-    "developer": "beomi",
-    "model_count": 1
-  },
-  {
-    "developer": "beowolx",
-    "model_count": 1
-  },
-  {
-    "developer": "berkeley-nest",
-    "model_count": 2
-  },
-  {
-    "developer": "bfuzzy1",
-    "model_count": 7
-  },
-  {
-    "developer": "bhuvneshsaini",
-    "model_count": 1
-  },
-  {
-    "developer": "bigcode",
-    "model_count": 3
-  },
-  {
-    "developer": "bigscience",
-    "model_count": 7
-  },
-  {
-    "developer": "bittensor",
-    "model_count": 1
-  },
-  {
-    "developer": "BlackBeenie",
-    "model_count": 9
-  },
-  {
-    "developer": "Bllossom",
-    "model_count": 1
-  },
-  {
-    "developer": "bluuwhale",
-    "model_count": 1
-  },
-  {
-    "developer": "BoltMonkey",
-    "model_count": 3
-  },
-  {
-    "developer": "bond005",
-    "model_count": 1
-  },
-  {
-    "developer": "bosonai",
-    "model_count": 1
-  },
-  {
-    "developer": "braindao",
-    "model_count": 17
-  },
-  {
-    "developer": "BrainWave-ML",
-    "model_count": 1
-  },
-  {
-    "developer": "BramVanroy",
-    "model_count": 4
-  },
-  {
-    "developer": "brgx53",
-    "model_count": 6
-  },
-  {
-    "developer": "BSC-LT",
-    "model_count": 2
-  },
-  {
-    "developer": "bunnycore",
-    "model_count": 85
-  },
-  {
-    "developer": "byroneverson",
-    "model_count": 3
-  },
-  {
-    "developer": "ByteDance",
-    "model_count": 1
-  },
-  {
-    "developer": "c10x",
-    "model_count": 2
-  },
-  {
-    "developer": "CarrotAI",
-    "model_count": 2
-  },
-  {
-    "developer": "carsenk",
-    "model_count": 2
-  },
-  {
-    "developer": "Casual-Autopsy",
-    "model_count": 1
-  },
-  {
-    "developer": "cat-searcher",
-    "model_count": 2
-  },
-  {
-    "developer": "CausalLM",
-    "model_count": 3
-  },
-  {
-    "developer": "cckm",
-    "model_count": 1
-  },
-  {
-    "developer": "cgato",
-    "model_count": 1
-  },
-  {
-    "developer": "Changgil",
-    "model_count": 2
-  },
-  {
-    "developer": "chargoddard",
-    "model_count": 1
-  },
-  {
-    "developer": "chujiezheng",
-    "model_count": 2
-  },
-  {
-    "developer": "CIR-AMS",
-    "model_count": 1
-  },
-  {
-    "developer": "cjvt",
-    "model_count": 1
-  },
-  {
-    "developer": "ClaudioItaly",
-    "model_count": 4
-  },
-  {
-    "developer": "cloudyu",
-    "model_count": 7
-  },
-  {
-    "developer": "cluebbers",
-    "model_count": 3
-  },
-  {
-    "developer": "cognitivecomputations",
-    "model_count": 17
-  },
-  {
-    "developer": "cohere",
-    "model_count": 18
-  },
-  {
-    "developer": "CohereForAI",
-    "model_count": 8
-  },
-  {
-    "developer": "collaiborateorg",
-    "model_count": 1
-  },
-  {
-    "developer": "Columbia-NLP",
-    "model_count": 6
-  },
-  {
-    "developer": "CombinHorizon",
-    "model_count": 6
-  },
-  {
-    "developer": "ContactDoctor",
-    "model_count": 2
-  },
-  {
-    "developer": "ContextualAI",
-    "model_count": 16
-  },
-  {
-    "developer": "CoolSpring",
-    "model_count": 3
-  },
-  {
-    "developer": "Corianas",
-    "model_count": 3
-  },
-  {
-    "developer": "CortexLM",
-    "model_count": 1
-  },
-  {
-    "developer": "cpayne1303",
-    "model_count": 4
-  },
-  {
-    "developer": "Cran-May",
-    "model_count": 7
-  },
-  {
-    "developer": "CreitinGameplays",
-    "model_count": 1
-  },
-  {
-    "developer": "crestf411",
-    "model_count": 1
-  },
-  {
-    "developer": "cstr",
-    "model_count": 1
-  },
-  {
-    "developer": "CultriX",
-    "model_count": 32
-  },
-  {
-    "developer": "cyberagent",
-    "model_count": 1
-  },
-  {
-    "developer": "CYFRAGOVPL",
-    "model_count": 6
-  },
-  {
-    "developer": "Daemontatox",
-    "model_count": 32
-  },
-  {
-    "developer": "Dampfinchen",
-    "model_count": 1
-  },
-  {
-    "developer": "Danielbrdz",
-    "model_count": 7
-  },
-  {
-    "developer": "Dans-DiscountModels",
-    "model_count": 9
-  },
-  {
-    "developer": "darkc0de",
-    "model_count": 3
-  },
-  {
-    "developer": "Darkknight535",
-    "model_count": 1
-  },
-  {
-    "developer": "databricks",
-    "model_count": 6
-  },
-  {
-    "developer": "Databricks-Mosaic-Research",
-    "model_count": 1
-  },
-  {
-    "developer": "DavidAU",
-    "model_count": 25
-  },
-  {
-    "developer": "davidkim205",
-    "model_count": 2
-  },
-  {
-    "developer": "Davidsv",
-    "model_count": 1
-  },
-  {
-    "developer": "DavieLion",
-    "model_count": 5
-  },
-  {
-    "developer": "DebateLabKIT",
-    "model_count": 1
-  },
-  {
-    "developer": "Deci",
-    "model_count": 2
-  },
-  {
-    "developer": "DeepAutoAI",
-    "model_count": 12
-  },
-  {
-    "developer": "DeepMount00",
-    "model_count": 13
-  },
-  {
-    "developer": "DeepSeek",
-    "model_count": 9
-  },
-  {
-    "developer": "deepseek-ai",
-    "model_count": 13
-  },
-  {
-    "developer": "Delta-Vector",
-    "model_count": 7
-  },
-  {
-    "developer": "DevQuasar",
-    "model_count": 1
-  },
-  {
-    "developer": "dfurman",
-    "model_count": 4
-  },
-  {
-    "developer": "dicta-il",
-    "model_count": 2
-  },
-  {
-    "developer": "distilbert",
-    "model_count": 1
-  },
-  {
-    "developer": "divyanshukunwar",
-    "model_count": 1
-  },
-  {
-    "developer": "djuna",
-    "model_count": 15
-  },
-  {
-    "developer": "djuna-test-lab",
-    "model_count": 2
-  },
-  {
-    "developer": "dnhkng",
-    "model_count": 10
-  },
-  {
-    "developer": "Dongwei",
-    "model_count": 1
-  },
-  {
-    "developer": "DoppelReflEx",
-    "model_count": 29
-  },
-  {
-    "developer": "DreadPoor",
-    "model_count": 119
-  },
-  {
-    "developer": "dreamgen",
-    "model_count": 1
-  },
-  {
-    "developer": "DRXD1000",
-    "model_count": 2
-  },
-  {
-    "developer": "DUAL-GPO",
-    "model_count": 1
-  },
-  {
-    "developer": "dustinwloring1988",
-    "model_count": 7
-  },
-  {
-    "developer": "duyhv1411",
-    "model_count": 2
-  },
-  {
-    "developer": "dwikitheduck",
-    "model_count": 6
-  },
-  {
-    "developer": "dzakwan",
-    "model_count": 1
-  },
-  {
-    "developer": "DZgas",
-    "model_count": 1
-  },
-  {
-    "developer": "ECE-ILAB-PRYMMAL",
-    "model_count": 1
-  },
-  {
-    "developer": "Edgerunners",
-    "model_count": 1
-  },
-  {
-    "developer": "ehristoforu",
-    "model_count": 36
-  },
-  {
-    "developer": "EleutherAI",
-    "model_count": 12
-  },
-  {
-    "developer": "elinas",
-    "model_count": 1
-  },
-  {
-    "developer": "ell44ot",
-    "model_count": 1
-  },
-  {
-    "developer": "Enno-Ai",
-    "model_count": 4
-  },
-  {
-    "developer": "EnnoAi",
-    "model_count": 2
-  },
-  {
-    "developer": "Epiculous",
-    "model_count": 4
-  },
-  {
-    "developer": "EpistemeAI",
-    "model_count": 47
-  },
-  {
-    "developer": "EpistemeAI2",
-    "model_count": 15
-  },
-  {
-    "developer": "Eric111",
-    "model_count": 2
-  },
-  {
-    "developer": "Etherll",
-    "model_count": 8
-  },
-  {
-    "developer": "euclaise",
-    "model_count": 1
-  },
-  {
-    "developer": "Eurdem",
-    "model_count": 1
-  },
-  {
-    "developer": "EVA-UNIT-01",
-    "model_count": 2
-  },
-  {
-    "developer": "eworojoshua",
-    "model_count": 1
-  },
-  {
-    "developer": "ewre324",
-    "model_count": 4
-  },
-  {
-    "developer": "experiment-llm",
-    "model_count": 1
-  },
-  {
-    "developer": "facebook",
-    "model_count": 4
-  },
-  {
-    "developer": "failspy",
-    "model_count": 6
-  },
-  {
-    "developer": "FallenMerick",
-    "model_count": 1
-  },
-  {
-    "developer": "fblgit",
-    "model_count": 11
-  },
-  {
-    "developer": "Felladrin",
-    "model_count": 2
-  },
-  {
-    "developer": "fhai50032",
-    "model_count": 2
-  },
-  {
-    "developer": "FINGU-AI",
-    "model_count": 7
-  },
-  {
-    "developer": "flammenai",
-    "model_count": 6
-  },
-  {
-    "developer": "FlofloB",
-    "model_count": 27
-  },
-  {
-    "developer": "fluently-lm",
-    "model_count": 3
-  },
-  {
-    "developer": "fluently-sets",
-    "model_count": 2
-  },
-  {
-    "developer": "formulae",
-    "model_count": 10
-  },
-  {
-    "developer": "frameai",
-    "model_count": 1
-  },
-  {
-    "developer": "freewheelin",
-    "model_count": 4
-  },
-  {
-    "developer": "FuJhen",
-    "model_count": 4
-  },
-  {
-    "developer": "fulim",
-    "model_count": 1
-  },
-  {
-    "developer": "FuseAI",
-    "model_count": 4
-  },
-  {
-    "developer": "gabrielmbmb",
-    "model_count": 1
-  },
-  {
-    "developer": "GalrionSoftworks",
-    "model_count": 2
-  },
-  {
-    "developer": "gaverfraxz",
-    "model_count": 2
-  },
-  {
-    "developer": "gbueno86",
-    "model_count": 2
-  },
-  {
-    "developer": "general-preference",
-    "model_count": 2
-  },
-  {
-    "developer": "GenVRadmin",
-    "model_count": 4
-  },
-  {
-    "developer": "ghost-x",
-    "model_count": 1
-  },
-  {
-    "developer": "glaiveai",
-    "model_count": 1
-  },
-  {
-    "developer": "gmonsoon",
-    "model_count": 5
-  },
-  {
-    "developer": "godlikehhd",
-    "model_count": 26
-  },
-  {
-    "developer": "Goekdeniz-Guelmez",
-    "model_count": 10
-  },
-  {
-    "developer": "Google",
-    "model_count": 78
-  },
-  {
-    "developer": "GoToCompany",
-    "model_count": 2
-  },
-  {
-    "developer": "goulue5",
-    "model_count": 1
-  },
-  {
-    "developer": "gradientai",
-    "model_count": 1
-  },
-  {
-    "developer": "GreenNode",
-    "model_count": 1
-  },
-  {
-    "developer": "grimjim",
-    "model_count": 25
-  },
-  {
-    "developer": "GritLM",
-    "model_count": 2
-  },
-  {
-    "developer": "Groq",
-    "model_count": 1
-  },
-  {
-    "developer": "Gryphe",
-    "model_count": 5
-  },
-  {
-    "developer": "GuilhermeNaturaUmana",
-    "model_count": 1
-  },
-  {
-    "developer": "Gunulhona",
-    "model_count": 2
-  },
-  {
-    "developer": "gupta-tanish",
-    "model_count": 1
-  },
-  {
-    "developer": "gz987",
-    "model_count": 4
-  },
-  {
-    "developer": "h2oai",
-    "model_count": 5
-  },
-  {
-    "developer": "haoranxu",
-    "model_count": 3
-  },
-  {
-    "developer": "HarbingerX",
-    "model_count": 2
-  },
-  {
-    "developer": "Hastagaras",
-    "model_count": 3
-  },
-  {
-    "developer": "hatemmahmoud",
-    "model_count": 1
-  },
-  {
-    "developer": "HelpingAI",
-    "model_count": 4
-  },
-  {
-    "developer": "hendrydong",
-    "model_count": 1
-  },
-  {
-    "developer": "HeraiHench",
-    "model_count": 4
-  },
-  {
-    "developer": "HFXM",
-    "model_count": 1
-  },
-  {
-    "developer": "HiroseKoichi",
-    "model_count": 1
-  },
-  {
-    "developer": "HoangHa",
-    "model_count": 1
-  },
-  {
-    "developer": "hon9kon9ize",
-    "model_count": 2
-  },
-  {
-    "developer": "hongbai12",
-    "model_count": 1
-  },
-  {
-    "developer": "hotmailuser",
-    "model_count": 34
-  },
-  {
-    "developer": "HPAI-BSC",
-    "model_count": 3
-  },
-  {
-    "developer": "huawei-noah-ustc",
-    "model_count": 1
-  },
-  {
-    "developer": "HuggingFaceH4",
-    "model_count": 5
-  },
-  {
-    "developer": "HuggingFaceTB",
-    "model_count": 12
-  },
-  {
-    "developer": "huggyllama",
-    "model_count": 3
-  },
-  {
-    "developer": "huihui-ai",
-    "model_count": 8
-  },
-  {
-    "developer": "HumanLLMs",
-    "model_count": 3
-  },
-  {
-    "developer": "huu-ontocord",
-    "model_count": 1
-  },
-  {
-    "developer": "ibivibiv",
-    "model_count": 2
-  },
-  {
-    "developer": "ibm",
-    "model_count": 8
-  },
-  {
-    "developer": "ibm-granite",
-    "model_count": 20
-  },
-  {
-    "developer": "icefog72",
-    "model_count": 62
-  },
-  {
-    "developer": "IDEA-CCNL",
-    "model_count": 2
-  },
-  {
-    "developer": "ifable",
-    "model_count": 1
-  },
-  {
-    "developer": "iFaz",
-    "model_count": 8
-  },
-  {
-    "developer": "ilsp",
-    "model_count": 1
-  },
-  {
-    "developer": "IlyaGusev",
-    "model_count": 2
-  },
-  {
-    "developer": "Infinirc",
-    "model_count": 1
-  },
-  {
-    "developer": "inflatebot",
-    "model_count": 1
-  },
-  {
-    "developer": "infly",
-    "model_count": 1
-  },
-  {
-    "developer": "informatiker",
-    "model_count": 1
-  },
-  {
-    "developer": "INSAIT-Institute",
-    "model_count": 1
-  },
-  {
-    "developer": "insightfactory",
-    "model_count": 1
-  },
-  {
-    "developer": "instruction-pretrain",
-    "model_count": 1
-  },
-  {
-    "developer": "Intel",
-    "model_count": 4
-  },
-  {
-    "developer": "internlm",
-    "model_count": 9
-  },
-  {
-    "developer": "intervitens",
-    "model_count": 1
-  },
-  {
-    "developer": "IntervitensInc",
-    "model_count": 1
-  },
-  {
-    "developer": "inumulaisk",
-    "model_count": 1
-  },
-  {
-    "developer": "invalid-coder",
-    "model_count": 1
-  },
-  {
-    "developer": "Invalid-Null",
-    "model_count": 2
-  },
-  {
-    "developer": "invisietch",
-    "model_count": 4
-  },
-  {
-    "developer": "irahulpandey",
-    "model_count": 1
-  },
-  {
-    "developer": "iRyanBell",
-    "model_count": 2
-  },
-  {
-    "developer": "Isaak-Carter",
-    "model_count": 3
-  },
-  {
-    "developer": "J-LAB",
-    "model_count": 1
-  },
-  {
-    "developer": "JackFram",
-    "model_count": 2
-  },
-  {
-    "developer": "Jacoby746",
-    "model_count": 7
-  },
-  {
-    "developer": "jaredjoss",
-    "model_count": 1
-  },
-  {
-    "developer": "jaspionjader",
-    "model_count": 196
-  },
-  {
-    "developer": "jayasuryajsk",
-    "model_count": 1
-  },
-  {
-    "developer": "JayHyeon",
-    "model_count": 174
-  },
-  {
-    "developer": "jeanmichela",
-    "model_count": 1
-  },
-  {
-    "developer": "jebcarter",
-    "model_count": 1
-  },
-  {
-    "developer": "jebish7",
-    "model_count": 9
-  },
-  {
-    "developer": "jeffmeloy",
-    "model_count": 18
-  },
-  {
-    "developer": "jeonsworld",
-    "model_count": 1
-  },
-  {
-    "developer": "jiangxinyang-shanda",
-    "model_count": 1
-  },
-  {
-    "developer": "jieliu",
-    "model_count": 1
-  },
-  {
-    "developer": "Jimmy19991222",
-    "model_count": 8
-  },
-  {
-    "developer": "jiviai",
-    "model_count": 1
-  },
-  {
-    "developer": "jlzhou",
-    "model_count": 1
-  },
-  {
-    "developer": "johnsutor",
-    "model_count": 31
-  },
-  {
-    "developer": "jondurbin",
-    "model_count": 1
-  },
-  {
-    "developer": "Joseph717171",
-    "model_count": 2
-  },
-  {
-    "developer": "Josephgflowers",
-    "model_count": 7
-  },
-  {
-    "developer": "jpacifico",
-    "model_count": 18
-  },
-  {
-    "developer": "jsfs11",
-    "model_count": 3
-  },
-  {
-    "developer": "JungZoona",
-    "model_count": 2
-  },
-  {
-    "developer": "Junhoee",
-    "model_count": 1
-  },
-  {
-    "developer": "kaist-ai",
-    "model_count": 4
-  },
-  {
-    "developer": "katanemo",
-    "model_count": 3
-  },
-  {
-    "developer": "kavonalds",
-    "model_count": 3
-  },
-  {
-    "developer": "kayfour",
-    "model_count": 1
-  },
-  {
-    "developer": "keeeeenw",
-    "model_count": 1
-  },
-  {
-    "developer": "kekmodel",
-    "model_count": 1
-  },
-  {
-    "developer": "kevin009",
-    "model_count": 1
-  },
-  {
-    "developer": "Khetterman",
-    "model_count": 2
-  },
-  {
-    "developer": "khoantap",
-    "model_count": 9
-  },
-  {
-    "developer": "khulaifi95",
-    "model_count": 1
-  },
-  {
-    "developer": "Kimargin",
-    "model_count": 1
-  },
-  {
-    "developer": "Kimi",
-    "model_count": 1
-  },
-  {
-    "developer": "KingNish",
-    "model_count": 7
-  },
-  {
-    "developer": "kms7530",
-    "model_count": 4
-  },
-  {
-    "developer": "kno10",
-    "model_count": 2
-  },
-  {
-    "developer": "Kquant03",
-    "model_count": 2
-  },
-  {
-    "developer": "Krystalan",
-    "model_count": 2
-  },
-  {
-    "developer": "KSU-HW-SEC",
-    "model_count": 4
-  },
-  {
-    "developer": "Kuaishou",
-    "model_count": 1
-  },
-  {
-    "developer": "Kukedlc",
-    "model_count": 7
-  },
-  {
-    "developer": "Kumar955",
-    "model_count": 1
-  },
-  {
-    "developer": "kyutai",
-    "model_count": 1
-  },
-  {
-    "developer": "kz919",
-    "model_count": 1
-  },
-  {
-    "developer": "L-RAGE",
-    "model_count": 1
-  },
-  {
-    "developer": "ladydaina",
-    "model_count": 1
-  },
-  {
-    "developer": "laislemke",
-    "model_count": 1
-  },
-  {
-    "developer": "lalainy",
-    "model_count": 7
-  },
-  {
-    "developer": "Lambent",
-    "model_count": 1
-  },
-  {
-    "developer": "Langboat",
-    "model_count": 1
-  },
-  {
-    "developer": "langgptai",
-    "model_count": 2
-  },
-  {
-    "developer": "lars1234",
-    "model_count": 1
-  },
-  {
-    "developer": "Lawnakk",
-    "model_count": 10
-  },
-  {
-    "developer": "leafspark",
-    "model_count": 1
-  },
-  {
-    "developer": "LEESM",
-    "model_count": 4
-  },
-  {
-    "developer": "lemon07r",
-    "model_count": 17
-  },
-  {
-    "developer": "LenguajeNaturalAI",
-    "model_count": 2
-  },
-  {
-    "developer": "LeroyDyer",
-    "model_count": 58
-  },
-  {
-    "developer": "lesubra",
-    "model_count": 8
-  },
-  {
-    "developer": "LGAI-EXAONE",
-    "model_count": 4
-  },
-  {
-    "developer": "lightblue",
-    "model_count": 5
-  },
-  {
-    "developer": "LightningRodLabs",
-    "model_count": 3
-  },
-  {
-    "developer": "Lil-R",
-    "model_count": 8
-  },
-  {
-    "developer": "LilRg",
-    "model_count": 10
-  },
-  {
-    "developer": "LimYeri",
-    "model_count": 5
-  },
-  {
-    "developer": "lkoenig",
-    "model_count": 11
-  },
-  {
-    "developer": "llm-blender",
-    "model_count": 1
-  },
-  {
-    "developer": "LLM360",
-    "model_count": 2
-  },
-  {
-    "developer": "LLM4Binary",
-    "model_count": 1
-  },
-  {
-    "developer": "llmat",
-    "model_count": 1
-  },
-  {
-    "developer": "llnYou",
-    "model_count": 5
-  },
-  {
-    "developer": "lmsys",
-    "model_count": 5
-  },
-  {
-    "developer": "Locutusque",
-    "model_count": 6
-  },
-  {
-    "developer": "lodrick-the-lafted",
-    "model_count": 1
-  },
-  {
-    "developer": "lordjia",
-    "model_count": 2
-  },
-  {
-    "developer": "lt-asset",
-    "model_count": 1
-  },
-  {
-    "developer": "lunahr",
-    "model_count": 2
-  },
-  {
-    "developer": "Luni",
-    "model_count": 2
-  },
-  {
-    "developer": "Lunzima",
-    "model_count": 18
-  },
-  {
-    "developer": "LxzGordon",
-    "model_count": 2
-  },
-  {
-    "developer": "Lyte",
-    "model_count": 3
-  },
-  {
-    "developer": "M4-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "m42-health",
-    "model_count": 1
-  },
-  {
-    "developer": "macadeliccc",
-    "model_count": 3
-  },
-  {
-    "developer": "madeagents",
-    "model_count": 4
-  },
-  {
-    "developer": "magnifi",
-    "model_count": 1
-  },
-  {
-    "developer": "Magpie-Align",
-    "model_count": 8
-  },
-  {
-    "developer": "MagusCorp",
-    "model_count": 1
-  },
-  {
-    "developer": "maldv",
-    "model_count": 7
-  },
-  {
-    "developer": "ManoloPueblo",
-    "model_count": 3
-  },
-  {
-    "developer": "marcuscedricridia",
-    "model_count": 40
-  },
-  {
-    "developer": "marin-community",
-    "model_count": 1
-  },
-  {
-    "developer": "MarinaraSpaghetti",
-    "model_count": 2
-  },
-  {
-    "developer": "Marsouuu",
-    "model_count": 8
-  },
-  {
-    "developer": "matouLeLoup",
-    "model_count": 5
-  },
-  {
-    "developer": "mattshumer",
-    "model_count": 3
-  },
-  {
-    "developer": "maywell",
-    "model_count": 1
-  },
-  {
-    "developer": "MaziyarPanahi",
-    "model_count": 44
-  },
-  {
-    "developer": "meditsolutions",
-    "model_count": 12
-  },
-  {
-    "developer": "meetkai",
-    "model_count": 1
-  },
-  {
-    "developer": "meraGPT",
-    "model_count": 1
-  },
-  {
-    "developer": "mergekit-community",
-    "model_count": 11
-  },
-  {
-    "developer": "MEscriva",
-    "model_count": 1
-  },
-  {
-    "developer": "Meta",
-    "model_count": 26
-  },
-  {
-    "developer": "meta-llama",
-    "model_count": 23
-  },
-  {
-    "developer": "meta-metrics",
-    "model_count": 1
-  },
-  {
-    "developer": "mhl1",
-    "model_count": 1
-  },
-  {
-    "developer": "microsoft",
-    "model_count": 19
-  },
-  {
-    "developer": "mightbe",
-    "model_count": 1
-  },
-  {
-    "developer": "migtissera",
-    "model_count": 8
-  },
-  {
-    "developer": "Minami-su",
-    "model_count": 5
-  },
-  {
-    "developer": "mindw96",
-    "model_count": 1
-  },
-  {
-    "developer": "minghaowu",
-    "model_count": 1
-  },
-  {
-    "developer": "MiniMax",
-    "model_count": 4
-  },
-  {
-    "developer": "ministral",
-    "model_count": 1
-  },
-  {
-    "developer": "mistral-community",
-    "model_count": 3
-  },
-  {
-    "developer": "mistralai",
-    "model_count": 36
-  },
-  {
-    "developer": "mixtao",
-    "model_count": 1
-  },
-  {
-    "developer": "mkurman",
-    "model_count": 3
-  },
-  {
-    "developer": "mkxu",
-    "model_count": 2
-  },
-  {
-    "developer": "mlabonne",
-    "model_count": 14
-  },
-  {
-    "developer": "MLP-KTLim",
-    "model_count": 1
-  },
-  {
-    "developer": "mlx-community",
-    "model_count": 2
-  },
-  {
-    "developer": "mmnga",
-    "model_count": 1
-  },
-  {
-    "developer": "mobiuslabsgmbh",
-    "model_count": 2
-  },
-  {
-    "developer": "ModelCloud",
-    "model_count": 1
-  },
-  {
-    "developer": "ModelSpace",
-    "model_count": 1
-  },
-  {
-    "developer": "moeru-ai",
-    "model_count": 3
-  },
-  {
-    "developer": "monsterapi",
-    "model_count": 2
-  },
-  {
-    "developer": "MoonRide",
-    "model_count": 1
-  },
-  {
-    "developer": "moonshot",
-    "model_count": 2
-  },
-  {
-    "developer": "Moonshot AI",
-    "model_count": 2
-  },
-  {
-    "developer": "moonshotai",
-    "model_count": 2
-  },
-  {
-    "developer": "mosaicml",
-    "model_count": 3
-  },
-  {
-    "developer": "mosama",
-    "model_count": 1
-  },
-  {
-    "developer": "Mostafa8Mehrabi",
-    "model_count": 1
-  },
-  {
-    "developer": "mrdayl",
-    "model_count": 5
-  },
-  {
-    "developer": "mrm8488",
-    "model_count": 2
-  },
-  {
-    "developer": "MrRobotoAI",
-    "model_count": 2
-  },
-  {
-    "developer": "MTSAIR",
-    "model_count": 2
-  },
-  {
-    "developer": "mukaj",
-    "model_count": 1
-  },
-  {
-    "developer": "Multiple",
-    "model_count": 1
-  },
-  {
-    "developer": "MultivexAI",
-    "model_count": 5
-  },
-  {
-    "developer": "Mxode",
-    "model_count": 5
-  },
-  {
-    "developer": "my_model",
-    "model_count": 1
-  },
-  {
-    "developer": "nanbeige",
-    "model_count": 2
-  },
-  {
-    "developer": "NAPS-ai",
-    "model_count": 7
-  },
-  {
-    "developer": "natong19",
-    "model_count": 2
-  },
-  {
-    "developer": "Naveenpoliasetty",
-    "model_count": 1
-  },
-  {
-    "developer": "nazimali",
-    "model_count": 2
-  },
-  {
-    "developer": "NbAiLab",
-    "model_count": 2
-  },
-  {
-    "developer": "nbeerbower",
-    "model_count": 51
-  },
-  {
-    "developer": "nbrahme",
-    "model_count": 1
-  },
-  {
-    "developer": "NCSOFT",
-    "model_count": 3
-  },
-  {
-    "developer": "necva",
-    "model_count": 2
-  },
-  {
-    "developer": "Nekochu",
-    "model_count": 4
-  },
-  {
-    "developer": "neopolita",
-    "model_count": 11
-  },
-  {
-    "developer": "netcat420",
-    "model_count": 48
-  },
-  {
-    "developer": "netease-youdao",
-    "model_count": 1
-  },
-  {
-    "developer": "NeverSleep",
-    "model_count": 2
-  },
-  {
-    "developer": "newsbang",
-    "model_count": 7
-  },
-  {
-    "developer": "Nexesenex",
-    "model_count": 44
-  },
-  {
-    "developer": "Nexusflow",
-    "model_count": 2
-  },
-  {
-    "developer": "nguyentd",
-    "model_count": 1
-  },
-  {
-    "developer": "ngxson",
-    "model_count": 2
-  },
-  {
-    "developer": "nhyha",
-    "model_count": 5
-  },
-  {
-    "developer": "nicolinho",
-    "model_count": 4
-  },
-  {
-    "developer": "nidum",
-    "model_count": 1
-  },
-  {
-    "developer": "NikolaSigmoid",
-    "model_count": 7
-  },
-  {
-    "developer": "nisten",
-    "model_count": 2
-  },
-  {
-    "developer": "Nitral-AI",
-    "model_count": 8
-  },
-  {
-    "developer": "NJS26",
-    "model_count": 1
-  },
-  {
-    "developer": "NLPark",
-    "model_count": 3
-  },
-  {
-    "developer": "nlpguy",
-    "model_count": 9
-  },
-  {
-    "developer": "Nohobby",
-    "model_count": 2
-  },
-  {
-    "developer": "noname0202",
-    "model_count": 8
-  },
-  {
-    "developer": "Norquinal",
-    "model_count": 8
-  },
-  {
-    "developer": "NotASI",
-    "model_count": 4
-  },
-  {
-    "developer": "notbdq",
-    "model_count": 1
-  },
-  {
-    "developer": "nothingiisreal",
-    "model_count": 3
-  },
-  {
-    "developer": "NousResearch",
-    "model_count": 19
-  },
-  {
-    "developer": "Novaciano",
-    "model_count": 11
-  },
-  {
-    "developer": "NTQAI",
-    "model_count": 2
-  },
-  {
-    "developer": "NucleusAI",
-    "model_count": 1
-  },
-  {
-    "developer": "nvidia",
-    "model_count": 22
-  },
-  {
-    "developer": "nxmwxm",
-    "model_count": 1
-  },
-  {
-    "developer": "NYTK",
-    "model_count": 2
-  },
-  {
-    "developer": "NyxKrage",
-    "model_count": 1
-  },
-  {
-    "developer": "occiglot",
-    "model_count": 1
-  },
-  {
-    "developer": "odyssey-labs",
-    "model_count": 1
-  },
-  {
-    "developer": "OEvortex",
-    "model_count": 5
-  },
-  {
-    "developer": "olabs-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "OliveiraJLT",
-    "model_count": 1
-  },
-  {
-    "developer": "Omkar1102",
-    "model_count": 1
-  },
-  {
-    "developer": "OmnicromsBrain",
-    "model_count": 1
-  },
-  {
-    "developer": "OnlyCheeini",
-    "model_count": 1
-  },
-  {
-    "developer": "ontocord",
-    "model_count": 32
-  },
-  {
-    "developer": "oobabooga",
-    "model_count": 1
-  },
-  {
-    "developer": "oopere",
-    "model_count": 9
-  },
-  {
-    "developer": "open-atlas",
-    "model_count": 2
-  },
-  {
-    "developer": "open-neo",
-    "model_count": 2
-  },
-  {
-    "developer": "Open-Orca",
-    "model_count": 1
-  },
-  {
-    "developer": "open-thoughts",
-    "model_count": 1
-  },
-  {
-    "developer": "OpenAI",
-    "model_count": 75
-  },
-  {
-    "developer": "openai-community",
-    "model_count": 4
-  },
-  {
-    "developer": "OpenAssistant",
-    "model_count": 4
-  },
-  {
-    "developer": "openbmb",
-    "model_count": 7
-  },
-  {
-    "developer": "OpenBuddy",
-    "model_count": 22
-  },
-  {
-    "developer": "openchat",
-    "model_count": 6
-  },
-  {
-    "developer": "opencompass",
-    "model_count": 4
-  },
-  {
-    "developer": "OpenGenerativeAI",
-    "model_count": 2
-  },
-  {
-    "developer": "OpenLeecher",
-    "model_count": 1
-  },
-  {
-    "developer": "OpenLLM-France",
-    "model_count": 4
-  },
-  {
-    "developer": "OpenScholar",
-    "model_count": 1
-  },
-  {
-    "developer": "orai-nlp",
-    "model_count": 1
-  },
-  {
-    "developer": "Orenguteng",
-    "model_count": 2
-  },
-  {
-    "developer": "Orion-zhen",
-    "model_count": 2
-  },
-  {
-    "developer": "oxyapi",
-    "model_count": 1
-  },
-  {
-    "developer": "ozone-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "ozone-research",
-    "model_count": 1
-  },
-  {
-    "developer": "P0x0",
-    "model_count": 1
-  },
-  {
-    "developer": "paloalma",
-    "model_count": 5
-  },
-  {
-    "developer": "pankajmathur",
-    "model_count": 29
-  },
-  {
-    "developer": "Parissa3",
-    "model_count": 1
-  },
-  {
-    "developer": "paulml",
-    "model_count": 1
-  },
-  {
-    "developer": "phronetic-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "Pinkstack",
-    "model_count": 4
-  },
-  {
-    "developer": "pints-ai",
-    "model_count": 2
-  },
-  {
-    "developer": "piotr25691",
-    "model_count": 3
-  },
-  {
-    "developer": "PJMixers",
-    "model_count": 1
-  },
-  {
-    "developer": "PJMixers-Dev",
-    "model_count": 9
-  },
-  {
-    "developer": "PKU-Alignment",
-    "model_count": 4
-  },
-  {
-    "developer": "PocketDoc",
-    "model_count": 5
-  },
-  {
-    "developer": "PoLL",
-    "model_count": 1
-  },
-  {
-    "developer": "postbot",
-    "model_count": 1
-  },
-  {
-    "developer": "PowerInfer",
-    "model_count": 1
-  },
-  {
-    "developer": "PranavHarshan",
-    "model_count": 2
-  },
-  {
-    "developer": "Pretergeek",
-    "model_count": 9
-  },
-  {
-    "developer": "PrimeIntellect",
-    "model_count": 2
-  },
-  {
-    "developer": "prince-canuma",
-    "model_count": 1
-  },
-  {
-    "developer": "princeton-nlp",
-    "model_count": 51
-  },
-  {
-    "developer": "prithivMLmods",
-    "model_count": 110
-  },
-  {
-    "developer": "prometheus-eval",
-    "model_count": 2
-  },
-  {
-    "developer": "pszemraj",
-    "model_count": 2
-  },
-  {
-    "developer": "PuxAI",
-    "model_count": 1
-  },
-  {
-    "developer": "PygmalionAI",
-    "model_count": 1
-  },
-  {
-    "developer": "Q-bert",
-    "model_count": 1
-  },
-  {
-    "developer": "qingy2019",
-    "model_count": 7
-  },
-  {
-    "developer": "qingy2024",
-    "model_count": 17
-  },
-  {
-    "developer": "qq8933",
-    "model_count": 1
-  },
-  {
-    "developer": "Quazim0t0",
-    "model_count": 69
-  },
-  {
-    "developer": "Qwen",
-    "model_count": 80
-  },
-  {
-    "developer": "R-I-S-E",
-    "model_count": 2
-  },
-  {
-    "developer": "Rakuten",
-    "model_count": 3
-  },
-  {
-    "developer": "raphgg",
-    "model_count": 1
-  },
-  {
-    "developer": "rasyosef",
-    "model_count": 4
-  },
-  {
-    "developer": "Ray2333",
-    "model_count": 10
-  },
-  {
-    "developer": "RDson",
-    "model_count": 1
-  },
-  {
-    "developer": "realtreetune",
-    "model_count": 1
-  },
-  {
-    "developer": "recoilme",
-    "model_count": 6
-  },
-  {
-    "developer": "redrix",
-    "model_count": 2
-  },
-  {
-    "developer": "refuelai",
-    "model_count": 1
-  },
-  {
-    "developer": "Replete-AI",
-    "model_count": 9
-  },
-  {
-    "developer": "RESMPDEV",
-    "model_count": 2
-  },
-  {
-    "developer": "RezVortex",
-    "model_count": 2
-  },
-  {
-    "developer": "rhplus0831",
-    "model_count": 1
-  },
-  {
-    "developer": "rhymes-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "rhysjones",
-    "model_count": 1
-  },
-  {
-    "developer": "riaz",
-    "model_count": 1
-  },
-  {
-    "developer": "RLHFlow",
-    "model_count": 4
-  },
-  {
-    "developer": "rmdhirr",
-    "model_count": 1
-  },
-  {
-    "developer": "Ro-xe",
-    "model_count": 4
-  },
-  {
-    "developer": "Rombo-Org",
-    "model_count": 1
-  },
-  {
-    "developer": "rombodawg",
-    "model_count": 14
-  },
-  {
-    "developer": "rootxhacker",
-    "model_count": 3
-  },
-  {
-    "developer": "rsh345",
-    "model_count": 1
-  },
-  {
-    "developer": "rubenroy",
-    "model_count": 3
-  },
-  {
-    "developer": "RubielLabarta",
-    "model_count": 1
-  },
-  {
-    "developer": "ruizhe1217",
-    "model_count": 1
-  },
-  {
-    "developer": "rwitz",
-    "model_count": 1
-  },
-  {
-    "developer": "RWKV",
-    "model_count": 1
-  },
-  {
-    "developer": "sabersaleh",
-    "model_count": 7
-  },
-  {
-    "developer": "sabersalehk",
-    "model_count": 4
-  },
-  {
-    "developer": "SaisExperiments",
-    "model_count": 6
-  },
-  {
-    "developer": "saishf",
-    "model_count": 2
-  },
-  {
-    "developer": "saishshinde15",
-    "model_count": 3
-  },
-  {
-    "developer": "sakaltcommunity",
-    "model_count": 2
-  },
-  {
-    "developer": "Sakalti",
-    "model_count": 66
-  },
-  {
-    "developer": "sakhan10",
-    "model_count": 1
-  },
-  {
-    "developer": "salesforce",
-    "model_count": 9
-  },
-  {
-    "developer": "saltlux",
-    "model_count": 2
-  },
-  {
-    "developer": "sam-paech",
-    "model_count": 3
-  },
-  {
-    "developer": "SanjiWatsuki",
-    "model_count": 2
-  },
-  {
-    "developer": "Sao10K",
-    "model_count": 8
-  },
-  {
-    "developer": "sarvamai",
-    "model_count": 1
-  },
-  {
-    "developer": "Saxo",
-    "model_count": 11
-  },
-  {
-    "developer": "schnapss",
-    "model_count": 1
-  },
-  {
-    "developer": "Schrieffer",
-    "model_count": 1
-  },
-  {
-    "developer": "sci-m-wang",
-    "model_count": 3
-  },
-  {
-    "developer": "SeaLLMs",
-    "model_count": 3
-  },
-  {
-    "developer": "securin",
-    "model_count": 1
-  },
-  {
-    "developer": "senseable",
-    "model_count": 1
-  },
-  {
-    "developer": "SenseLLM",
-    "model_count": 2
-  },
-  {
-    "developer": "SentientAGI",
-    "model_count": 2
-  },
-  {
-    "developer": "SeppeV",
-    "model_count": 1
-  },
-  {
-    "developer": "sequelbox",
-    "model_count": 6
-  },
-  {
-    "developer": "sethuiyer",
-    "model_count": 6
-  },
-  {
-    "developer": "SF-Foundation",
-    "model_count": 2
-  },
-  {
-    "developer": "sfairXC",
-    "model_count": 1
-  },
-  {
-    "developer": "shadowml",
-    "model_count": 2
-  },
-  {
-    "developer": "Sharathhebbar24",
-    "model_count": 2
-  },
-  {
-    "developer": "shastraai",
-    "model_count": 1
-  },
-  {
-    "developer": "ShikaiChen",
-    "model_count": 1
-  },
-  {
-    "developer": "shivam9980",
-    "model_count": 2
-  },
-  {
-    "developer": "shivank21",
-    "model_count": 1
-  },
-  {
-    "developer": "Shreyash2010",
-    "model_count": 1
-  },
-  {
-    "developer": "shuttleai",
-    "model_count": 1
-  },
-  {
-    "developer": "shyamieee",
-    "model_count": 1
-  },
-  {
-    "developer": "Sicarius-Prototyping",
-    "model_count": 3
-  },
-  {
-    "developer": "SicariusSicariiStuff",
-    "model_count": 19
-  },
-  {
-    "developer": "silma-ai",
-    "model_count": 2
-  },
-  {
-    "developer": "siqi00",
-    "model_count": 2
-  },
-  {
-    "developer": "skumar9",
-    "model_count": 1
-  },
-  {
-    "developer": "skymizer",
-    "model_count": 1
-  },
-  {
-    "developer": "SkyOrbis",
-    "model_count": 12
-  },
-  {
-    "developer": "Skywork",
-    "model_count": 15
-  },
-  {
-    "developer": "snowflake",
-    "model_count": 1
-  },
-  {
-    "developer": "Solshine",
-    "model_count": 2
-  },
-  {
-    "developer": "someon98",
-    "model_count": 1
-  },
-  {
-    "developer": "sometimesanotion",
-    "model_count": 58
-  },
-  {
-    "developer": "sonthenguyen",
-    "model_count": 6
-  },
-  {
-    "developer": "sophosympatheia",
-    "model_count": 1
-  },
-  {
-    "developer": "Sorawiz",
-    "model_count": 2
-  },
-  {
-    "developer": "Sourjayon",
-    "model_count": 2
-  },
-  {
-    "developer": "SpaceYL",
-    "model_count": 1
-  },
-  {
-    "developer": "speakleash",
-    "model_count": 5
-  },
-  {
-    "developer": "speakleash-ack-cyfronet-agh",
-    "model_count": 1
-  },
-  {
-    "developer": "Spestly",
-    "model_count": 3
-  },
-  {
-    "developer": "spmurrayzzz",
-    "model_count": 1
-  },
-  {
-    "developer": "spow12",
-    "model_count": 4
-  },
-  {
-    "developer": "ssmits",
-    "model_count": 1
-  },
-  {
-    "developer": "stabilityai",
-    "model_count": 9
-  },
-  {
-    "developer": "stanford",
-    "model_count": 1
-  },
-  {
-    "developer": "stanfordnlp",
-    "model_count": 2
-  },
-  {
-    "developer": "Stark2008",
-    "model_count": 3
-  },
-  {
-    "developer": "Steelskull",
-    "model_count": 2
-  },
-  {
-    "developer": "StelleX",
-    "model_count": 2
-  },
-  {
-    "developer": "sthenno",
-    "model_count": 9
-  },
-  {
-    "developer": "sthenno-com",
-    "model_count": 4
-  },
-  {
-    "developer": "streamerbtw1002",
-    "model_count": 1
-  },
-  {
-    "developer": "stupidity-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "suayptalha",
-    "model_count": 12
-  },
-  {
-    "developer": "SultanR",
-    "model_count": 4
-  },
-  {
-    "developer": "sumink",
-    "model_count": 22
-  },
-  {
-    "developer": "sunbaby",
-    "model_count": 1
-  },
-  {
-    "developer": "Supichi",
-    "model_count": 11
-  },
-  {
-    "developer": "Svak",
-    "model_count": 2
-  },
-  {
-    "developer": "swap-uniba",
-    "model_count": 1
-  },
-  {
-    "developer": "Syed-Hasan-8503",
-    "model_count": 1
-  },
-  {
-    "developer": "synergetic",
-    "model_count": 1
-  },
-  {
-    "developer": "T145",
-    "model_count": 51
-  },
-  {
-    "developer": "talha2001",
-    "model_count": 1
-  },
-  {
-    "developer": "tangledgroup",
-    "model_count": 2
-  },
-  {
-    "developer": "tanliboy",
-    "model_count": 3
-  },
-  {
-    "developer": "tannedbum",
-    "model_count": 4
-  },
-  {
-    "developer": "Tarek07",
-    "model_count": 2
-  },
-  {
-    "developer": "TeeZee",
-    "model_count": 1
-  },
-  {
-    "developer": "teknium",
-    "model_count": 5
-  },
-  {
-    "developer": "Telugu-LLM-Labs",
-    "model_count": 2
-  },
-  {
-    "developer": "TencentARC",
-    "model_count": 4
-  },
-  {
-    "developer": "tensopolis",
-    "model_count": 15
-  },
-  {
-    "developer": "tensoropera",
-    "model_count": 1
-  },
-  {
-    "developer": "tenyx",
-    "model_count": 1
-  },
-  {
-    "developer": "TheDrummer",
-    "model_count": 9
-  },
-  {
-    "developer": "TheDrunkenSnail",
-    "model_count": 3
-  },
-  {
-    "developer": "TheHierophant",
-    "model_count": 1
-  },
-  {
-    "developer": "theo77186",
-    "model_count": 1
-  },
-  {
-    "developer": "theprint",
-    "model_count": 18
-  },
-  {
-    "developer": "TheTsar1209",
-    "model_count": 7
-  },
-  {
-    "developer": "thinkcoder",
-    "model_count": 1
-  },
-  {
-    "developer": "thirdeyeai",
-    "model_count": 1
-  },
-  {
-    "developer": "thomas-yanxin",
-    "model_count": 4
-  },
-  {
-    "developer": "THUDM",
-    "model_count": 5
-  },
-  {
-    "developer": "tianyil1",
-    "model_count": 1
-  },
-  {
-    "developer": "TIGER-Lab",
-    "model_count": 6
-  },
-  {
-    "developer": "tii-uae",
-    "model_count": 4
-  },
-  {
-    "developer": "tiiuae",
-    "model_count": 18
-  },
-  {
-    "developer": "Tijmen2",
-    "model_count": 1
-  },
-  {
-    "developer": "tinycompany",
-    "model_count": 15
-  },
-  {
-    "developer": "TinyLlama",
-    "model_count": 6
-  },
-  {
-    "developer": "tklohj",
-    "model_count": 1
-  },
-  {
-    "developer": "ToastyPigeon",
-    "model_count": 1
-  },
-  {
-    "developer": "together",
-    "model_count": 4
-  },
-  {
-    "developer": "togethercomputer",
-    "model_count": 10
-  },
-  {
-    "developer": "tokyotech-llm",
-    "model_count": 1
-  },
-  {
-    "developer": "tomasmcm",
-    "model_count": 1
-  },
-  {
-    "developer": "Trappu",
-    "model_count": 2
-  },
-  {
-    "developer": "Tremontaine",
-    "model_count": 1
-  },
-  {
-    "developer": "Triangle104",
-    "model_count": 61
-  },
-  {
-    "developer": "trthminh1112",
-    "model_count": 1
-  },
-  {
-    "developer": "Tsunami-th",
-    "model_count": 4
-  },
-  {
-    "developer": "TTTXXX01",
-    "model_count": 1
-  },
-  {
-    "developer": "tugstugi",
-    "model_count": 1
-  },
-  {
-    "developer": "UCLA-AGI",
-    "model_count": 10
-  },
-  {
-    "developer": "uiuc-oumi",
-    "model_count": 2
-  },
-  {
-    "developer": "UKzExecution",
-    "model_count": 1
-  },
-  {
-    "developer": "Unbabel",
-    "model_count": 1
-  },
-  {
-    "developer": "Undi95",
-    "model_count": 2
-  },
-  {
-    "developer": "universalml",
-    "model_count": 1
-  },
-  {
-    "developer": "unknown",
-    "model_count": 10
-  },
-  {
-    "developer": "unsloth",
-    "model_count": 6
-  },
-  {
-    "developer": "upstage",
-    "model_count": 4
-  },
-  {
-    "developer": "utkmst",
-    "model_count": 1
-  },
-  {
-    "developer": "uukuguy",
-    "model_count": 7
-  },
-  {
-    "developer": "v000000",
-    "model_count": 6
-  },
-  {
-    "developer": "V3N0M",
-    "model_count": 1
-  },
-  {
-    "developer": "VAGOsolutions",
-    "model_count": 17
-  },
-  {
-    "developer": "ValiantLabs",
-    "model_count": 11
-  },
-  {
-    "developer": "vhab10",
-    "model_count": 3
-  },
-  {
-    "developer": "vicgalle",
-    "model_count": 12
-  },
-  {
-    "developer": "viettelsecurity-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "vihangd",
-    "model_count": 1
-  },
-  {
-    "developer": "Vikhrmodels",
-    "model_count": 2
-  },
-  {
-    "developer": "VIRNECT",
-    "model_count": 2
-  },
-  {
-    "developer": "voidful",
-    "model_count": 1
-  },
-  {
-    "developer": "vonjack",
-    "model_count": 7
-  },
-  {
-    "developer": "w4r10ck",
-    "model_count": 1
-  },
-  {
-    "developer": "wanlige",
-    "model_count": 3
-  },
-  {
-    "developer": "wannaphong",
-    "model_count": 1
-  },
-  {
-    "developer": "waqasali1707",
-    "model_count": 1
-  },
-  {
-    "developer": "wave-on-discord",
-    "model_count": 1
-  },
-  {
-    "developer": "weathermanj",
-    "model_count": 4
-  },
-  {
-    "developer": "wenbopan",
-    "model_count": 1
-  },
-  {
-    "developer": "weqweasdas",
-    "model_count": 5
-  },
-  {
-    "developer": "Weyaxi",
-    "model_count": 8
-  },
-  {
-    "developer": "win10",
-    "model_count": 9
-  },
-  {
-    "developer": "winglian",
-    "model_count": 2
-  },
-  {
-    "developer": "WizardLMTeam",
-    "model_count": 3
-  },
-  {
-    "developer": "Wladastic",
-    "model_count": 1
-  },
-  {
-    "developer": "writer",
-    "model_count": 8
-  },
-  {
-    "developer": "wzhouad",
-    "model_count": 1
-  },
-  {
-    "developer": "x0000001",
-    "model_count": 1
-  },
-  {
-    "developer": "xAI",
-    "model_count": 11
-  },
-  {
-    "developer": "Xclbr7",
-    "model_count": 4
-  },
-  {
-    "developer": "Xiaojian9992024",
-    "model_count": 12
-  },
-  {
-    "developer": "xinchen9",
-    "model_count": 5
-  },
-  {
-    "developer": "Xkev",
-    "model_count": 1
-  },
-  {
-    "developer": "xkp24",
-    "model_count": 8
-  },
-  {
-    "developer": "xMaulana",
-    "model_count": 1
-  },
-  {
-    "developer": "xukp20",
-    "model_count": 8
-  },
-  {
-    "developer": "xwen-team",
-    "model_count": 1
-  },
-  {
-    "developer": "xxx777xxxASD",
-    "model_count": 1
-  },
-  {
-    "developer": "yam-peleg",
-    "model_count": 3
-  },
-  {
-    "developer": "yandex",
-    "model_count": 1
-  },
-  {
-    "developer": "yanng1242",
-    "model_count": 1
-  },
-  {
-    "developer": "Yash21",
-    "model_count": 1
-  },
-  {
-    "developer": "yasserrmd",
-    "model_count": 2
-  },
-  {
-    "developer": "ycros",
-    "model_count": 1
-  },
-  {
-    "developer": "yfzp",
-    "model_count": 8
-  },
-  {
-    "developer": "yifAI",
-    "model_count": 1
-  },
-  {
-    "developer": "ylalain",
-    "model_count": 1
-  },
-  {
-    "developer": "ymcki",
-    "model_count": 11
-  },
-  {
-    "developer": "Youlln",
-    "model_count": 19
-  },
-  {
-    "developer": "YoungPanda",
-    "model_count": 1
-  },
-  {
-    "developer": "YOYO-AI",
-    "model_count": 21
-  },
-  {
-    "developer": "yuchenxie",
-    "model_count": 2
-  },
-  {
-    "developer": "Yuma42",
-    "model_count": 3
-  },
-  {
-    "developer": "yuvraj17",
-    "model_count": 3
-  },
-  {
-    "developer": "Z-AI",
-    "model_count": 2
-  },
-  {
-    "developer": "Z.ai",
-    "model_count": 2
-  },
-  {
-    "developer": "Z1-Coder",
-    "model_count": 1
-  },
-  {
-    "developer": "zai-org",
-    "model_count": 1
-  },
-  {
-    "developer": "zake7749",
-    "model_count": 2
-  },
-  {
-    "developer": "zelk12",
-    "model_count": 78
-  },
-  {
-    "developer": "ZeroXClem",
-    "model_count": 11
-  },
-  {
-    "developer": "zetasepic",
-    "model_count": 2
-  },
-  {
-    "developer": "ZeusLabs",
-    "model_count": 1
-  },
-  {
-    "developer": "ZhangShenao",
-    "model_count": 1
-  },
-  {
-    "developer": "zhengr",
-    "model_count": 1
-  },
-  {
-    "developer": "zhipu",
-    "model_count": 3
-  },
-  {
-    "developer": "zhipu-ai",
-    "model_count": 1
-  },
-  {
-    "developer": "ZHLiu627",
-    "model_count": 2
-  },
-  {
-    "developer": "ZiyiYe",
-    "model_count": 1
-  }
-]

data/developers/0-hero.json DELETED Viewed

@@ -1,47 +0,0 @@
-{
-  "developer": "0-hero",
-  "models": [
-    {
-      "id": "0-hero/Matter-0.1-7B-boost-DPO-preview",
-      "name": "0-hero/Matter-0.1-7B-boost-DPO-preview",
-      "developer": "0-hero",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7448,
-        "reward-bench/Chat": 0.9106,
-        "reward-bench/Chat Hard": 0.6096,
-        "reward-bench/Safety": 0.7135,
-        "reward-bench/Reasoning": 0.8395,
-        "reward-bench/Prior Sets (0.5 weight)": 0.5566
-      }
-    },
-    {
-      "id": "0-hero/Matter-0.1-7B-DPO-preview",
-      "name": "0-hero/Matter-0.1-7B-DPO-preview",
-      "developer": "0-hero",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7247,
-        "reward-bench/Chat": 0.8939,
-        "reward-bench/Chat Hard": 0.5768,
-        "reward-bench/Safety": 0.6378,
-        "reward-bench/Reasoning": 0.8854,
-        "reward-bench/Prior Sets (0.5 weight)": 0.5348
-      }
-    },
-    {
-      "id": "0-hero/Matter-0.2-7B-DPO",
-      "name": "Matter-0.2-7B-DPO",
-      "developer": "0-hero",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3303,
-        "hfopenllm_v2/BBH": 0.3596,
-        "hfopenllm_v2/MATH Level 5": 0.0144,
-        "hfopenllm_v2/GPQA": 0.2592,
-        "hfopenllm_v2/MUSR": 0.3814,
-        "hfopenllm_v2/MMLU-PRO": 0.1164
-      }
-    }
-  ]
-}

data/developers/01-ai.json DELETED Viewed

@@ -1,417 +0,0 @@
-{
-  "developer": "01-ai",
-  "models": [
-    {
-      "id": "01-ai/Yi-1.5-34B",
-      "name": "Yi-1.5-34B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2841,
-        "hfopenllm_v2/BBH": 0.5976,
-        "hfopenllm_v2/MATH Level 5": 0.1533,
-        "hfopenllm_v2/GPQA": 0.3658,
-        "hfopenllm_v2/MUSR": 0.4236,
-        "hfopenllm_v2/MMLU-PRO": 0.4666
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-34B-32K",
-      "name": "Yi-1.5-34B-32K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3119,
-        "hfopenllm_v2/BBH": 0.6016,
-        "hfopenllm_v2/MATH Level 5": 0.1541,
-        "hfopenllm_v2/GPQA": 0.3633,
-        "hfopenllm_v2/MUSR": 0.4398,
-        "hfopenllm_v2/MMLU-PRO": 0.4709
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-34B-Chat",
-      "name": "Yi-1.5-34B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6067,
-        "hfopenllm_v2/BBH": 0.6084,
-        "hfopenllm_v2/MATH Level 5": 0.2772,
-        "hfopenllm_v2/GPQA": 0.3649,
-        "hfopenllm_v2/MUSR": 0.4282,
-        "hfopenllm_v2/MMLU-PRO": 0.452
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-34B-Chat-16K",
-      "name": "Yi-1.5-34B-Chat-16K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4564,
-        "hfopenllm_v2/BBH": 0.61,
-        "hfopenllm_v2/MATH Level 5": 0.2137,
-        "hfopenllm_v2/GPQA": 0.3381,
-        "hfopenllm_v2/MUSR": 0.4398,
-        "hfopenllm_v2/MMLU-PRO": 0.4545
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-6B",
-      "name": "Yi-1.5-6B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2617,
-        "hfopenllm_v2/BBH": 0.4493,
-        "hfopenllm_v2/MATH Level 5": 0.0665,
-        "hfopenllm_v2/GPQA": 0.3138,
-        "hfopenllm_v2/MUSR": 0.4374,
-        "hfopenllm_v2/MMLU-PRO": 0.3144
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-6B-Chat",
-      "name": "Yi-1.5-6B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5145,
-        "hfopenllm_v2/BBH": 0.4571,
-        "hfopenllm_v2/MATH Level 5": 0.1624,
-        "hfopenllm_v2/GPQA": 0.302,
-        "hfopenllm_v2/MUSR": 0.4392,
-        "hfopenllm_v2/MMLU-PRO": 0.3193
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-9B",
-      "name": "Yi-1.5-9B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2936,
-        "hfopenllm_v2/BBH": 0.5143,
-        "hfopenllm_v2/MATH Level 5": 0.114,
-        "hfopenllm_v2/GPQA": 0.3792,
-        "hfopenllm_v2/MUSR": 0.4328,
-        "hfopenllm_v2/MMLU-PRO": 0.3916
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-9B-32K",
-      "name": "Yi-1.5-9B-32K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2303,
-        "hfopenllm_v2/BBH": 0.4963,
-        "hfopenllm_v2/MATH Level 5": 0.108,
-        "hfopenllm_v2/GPQA": 0.3591,
-        "hfopenllm_v2/MUSR": 0.4186,
-        "hfopenllm_v2/MMLU-PRO": 0.3765
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-9B-Chat",
-      "name": "Yi-1.5-9B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6046,
-        "hfopenllm_v2/BBH": 0.5559,
-        "hfopenllm_v2/MATH Level 5": 0.2258,
-        "hfopenllm_v2/GPQA": 0.3347,
-        "hfopenllm_v2/MUSR": 0.4259,
-        "hfopenllm_v2/MMLU-PRO": 0.3975
-      }
-    },
-    {
-      "id": "01-ai/Yi-1.5-9B-Chat-16K",
-      "name": "Yi-1.5-9B-Chat-16K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4214,
-        "hfopenllm_v2/BBH": 0.5153,
-        "hfopenllm_v2/MATH Level 5": 0.1782,
-        "hfopenllm_v2/GPQA": 0.3087,
-        "hfopenllm_v2/MUSR": 0.4099,
-        "hfopenllm_v2/MMLU-PRO": 0.3994
-      }
-    },
-    {
-      "id": "01-ai/yi-34b",
-      "name": "Yi 34B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.57,
-        "helm_lite/NarrativeQA": 0.782,
-        "helm_lite/NaturalQuestions (closed-book)": 0.443,
-        "helm_lite/OpenbookQA": 0.92,
-        "helm_lite/MMLU": 0.65,
-        "helm_lite/MATH": 0.375,
-        "helm_lite/GSM8K": 0.648,
-        "helm_lite/LegalBench": 0.618,
-        "helm_lite/MedQA": 0.656,
-        "helm_lite/WMT 2014": 0.172,
-        "helm_mmlu/MMLU All Subjects": 0.762,
-        "helm_mmlu/Abstract Algebra": 0.4,
-        "helm_mmlu/Anatomy": 0.748,
-        "helm_mmlu/College Physics": 0.5,
-        "helm_mmlu/Computer Security": 0.83,
-        "helm_mmlu/Econometrics": 0.588,
-        "helm_mmlu/Global Facts": 0.53,
-        "helm_mmlu/Jurisprudence": 0.898,
-        "helm_mmlu/Philosophy": 0.82,
-        "helm_mmlu/Professional Psychology": 0.835,
-        "helm_mmlu/Us Foreign Policy": 0.91,
-        "helm_mmlu/Astronomy": 0.901,
-        "helm_mmlu/Business Ethics": 0.75,
-        "helm_mmlu/Clinical Knowledge": 0.8,
-        "helm_mmlu/Conceptual Physics": 0.77,
-        "helm_mmlu/Electrical Engineering": 0.779,
-        "helm_mmlu/Elementary Mathematics": 0.656,
-        "helm_mmlu/Formal Logic": 0.548,
-        "helm_mmlu/High School World History": 0.907,
-        "helm_mmlu/Human Sexuality": 0.87,
-        "helm_mmlu/International Law": 0.909,
-        "helm_mmlu/Logical Fallacies": 0.883,
-        "helm_mmlu/Machine Learning": 0.58,
-        "helm_mmlu/Management": 0.893,
-        "helm_mmlu/Marketing": 0.936,
-        "helm_mmlu/Medical Genetics": 0.87,
-        "helm_mmlu/Miscellaneous": 0.902,
-        "helm_mmlu/Moral Scenarios": 0.606,
-        "helm_mmlu/Nutrition": 0.869,
-        "helm_mmlu/Prehistory": 0.877,
-        "helm_mmlu/Public Relations": 0.745,
-        "helm_mmlu/Security Studies": 0.833,
-        "helm_mmlu/Sociology": 0.9,
-        "helm_mmlu/Virology": 0.572,
-        "helm_mmlu/World Religions": 0.877,
-        "helm_mmlu/Mean win rate": 0.315,
-        "hfopenllm_v2/IFEval": 0.3046,
-        "hfopenllm_v2/BBH": 0.5457,
-        "hfopenllm_v2/MATH Level 5": 0.0514,
-        "hfopenllm_v2/GPQA": 0.3666,
-        "hfopenllm_v2/MUSR": 0.4119,
-        "hfopenllm_v2/MMLU-PRO": 0.4412
-      }
-    },
-    {
-      "id": "01-ai/Yi-34B-200K",
-      "name": "Yi-34B-200K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.1542,
-        "hfopenllm_v2/BBH": 0.5442,
-        "hfopenllm_v2/MATH Level 5": 0.0574,
-        "hfopenllm_v2/GPQA": 0.3565,
-        "hfopenllm_v2/MUSR": 0.3817,
-        "hfopenllm_v2/MMLU-PRO": 0.4535
-      }
-    },
-    {
-      "id": "01-ai/Yi-34B-Chat",
-      "name": "Yi-34B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4699,
-        "hfopenllm_v2/BBH": 0.5561,
-        "hfopenllm_v2/MATH Level 5": 0.0627,
-        "hfopenllm_v2/GPQA": 0.3381,
-        "hfopenllm_v2/MUSR": 0.3978,
-        "hfopenllm_v2/MMLU-PRO": 0.4093
-      }
-    },
-    {
-      "id": "01-ai/yi-6b",
-      "name": "Yi 6B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.253,
-        "helm_lite/NarrativeQA": 0.702,
-        "helm_lite/NaturalQuestions (closed-book)": 0.31,
-        "helm_lite/OpenbookQA": 0.8,
-        "helm_lite/MMLU": 0.53,
-        "helm_lite/MATH": 0.126,
-        "helm_lite/GSM8K": 0.375,
-        "helm_lite/LegalBench": 0.519,
-        "helm_lite/MedQA": 0.497,
-        "helm_lite/WMT 2014": 0.117,
-        "helm_mmlu/MMLU All Subjects": 0.64,
-        "helm_mmlu/Abstract Algebra": 0.3,
-        "helm_mmlu/Anatomy": 0.6,
-        "helm_mmlu/College Physics": 0.422,
-        "helm_mmlu/Computer Security": 0.73,
-        "helm_mmlu/Econometrics": 0.351,
-        "helm_mmlu/Global Facts": 0.43,
-        "helm_mmlu/Jurisprudence": 0.796,
-        "helm_mmlu/Philosophy": 0.678,
-        "helm_mmlu/Professional Psychology": 0.668,
-        "helm_mmlu/Us Foreign Policy": 0.87,
-        "helm_mmlu/Astronomy": 0.684,
-        "helm_mmlu/Business Ethics": 0.67,
-        "helm_mmlu/Clinical Knowledge": 0.66,
-        "helm_mmlu/Conceptual Physics": 0.621,
-        "helm_mmlu/Electrical Engineering": 0.662,
-        "helm_mmlu/Elementary Mathematics": 0.452,
-        "helm_mmlu/Formal Logic": 0.452,
-        "helm_mmlu/High School World History": 0.785,
-        "helm_mmlu/Human Sexuality": 0.763,
-        "helm_mmlu/International Law": 0.769,
-        "helm_mmlu/Logical Fallacies": 0.779,
-        "helm_mmlu/Machine Learning": 0.411,
-        "helm_mmlu/Management": 0.806,
-        "helm_mmlu/Marketing": 0.893,
-        "helm_mmlu/Medical Genetics": 0.77,
-        "helm_mmlu/Miscellaneous": 0.796,
-        "helm_mmlu/Moral Scenarios": 0.335,
-        "helm_mmlu/Nutrition": 0.739,
-        "helm_mmlu/Prehistory": 0.713,
-        "helm_mmlu/Public Relations": 0.718,
-        "helm_mmlu/Security Studies": 0.735,
-        "helm_mmlu/Sociology": 0.831,
-        "helm_mmlu/Virology": 0.452,
-        "helm_mmlu/World Religions": 0.836,
-        "helm_mmlu/Mean win rate": 0.651,
-        "hfopenllm_v2/IFEval": 0.2893,
-        "hfopenllm_v2/BBH": 0.4309,
-        "hfopenllm_v2/MATH Level 5": 0.0159,
-        "hfopenllm_v2/GPQA": 0.2693,
-        "hfopenllm_v2/MUSR": 0.3937,
-        "hfopenllm_v2/MMLU-PRO": 0.2991
-      }
-    },
-    {
-      "id": "01-ai/Yi-6B-200K",
-      "name": "Yi-6B-200K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.0843,
-        "hfopenllm_v2/BBH": 0.4289,
-        "hfopenllm_v2/MATH Level 5": 0.0181,
-        "hfopenllm_v2/GPQA": 0.2819,
-        "hfopenllm_v2/MUSR": 0.4587,
-        "hfopenllm_v2/MMLU-PRO": 0.2844
-      }
-    },
-    {
-      "id": "01-ai/Yi-6B-Chat",
-      "name": "Yi-6B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3395,
-        "hfopenllm_v2/BBH": 0.4133,
-        "hfopenllm_v2/MATH Level 5": 0.0136,
-        "hfopenllm_v2/GPQA": 0.2945,
-        "hfopenllm_v2/MUSR": 0.3688,
-        "hfopenllm_v2/MMLU-PRO": 0.3061
-      }
-    },
-    {
-      "id": "01-ai/Yi-9B",
-      "name": "Yi-9B",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2709,
-        "hfopenllm_v2/BBH": 0.494,
-        "hfopenllm_v2/MATH Level 5": 0.0559,
-        "hfopenllm_v2/GPQA": 0.318,
-        "hfopenllm_v2/MUSR": 0.4054,
-        "hfopenllm_v2/MMLU-PRO": 0.3574
-      }
-    },
-    {
-      "id": "01-ai/Yi-9B-200K",
-      "name": "Yi-9B-200K",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2327,
-        "hfopenllm_v2/BBH": 0.4793,
-        "hfopenllm_v2/MATH Level 5": 0.0665,
-        "hfopenllm_v2/GPQA": 0.3154,
-        "hfopenllm_v2/MUSR": 0.4294,
-        "hfopenllm_v2/MMLU-PRO": 0.3622
-      }
-    },
-    {
-      "id": "01-ai/Yi-Coder-9B-Chat",
-      "name": "Yi-Coder-9B-Chat",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4817,
-        "hfopenllm_v2/BBH": 0.4814,
-        "hfopenllm_v2/MATH Level 5": 0.04,
-        "hfopenllm_v2/GPQA": 0.2475,
-        "hfopenllm_v2/MUSR": 0.3992,
-        "hfopenllm_v2/MMLU-PRO": 0.2425
-      }
-    },
-    {
-      "id": "01-ai/yi-large-preview",
-      "name": "Yi Large Preview",
-      "developer": "01-ai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.471,
-        "helm_lite/NarrativeQA": 0.373,
-        "helm_lite/NaturalQuestions (closed-book)": 0.428,
-        "helm_lite/OpenbookQA": 0.946,
-        "helm_lite/MMLU": 0.712,
-        "helm_lite/MATH": 0.712,
-        "helm_lite/GSM8K": 0.69,
-        "helm_lite/LegalBench": 0.519,
-        "helm_lite/MedQA": 0.66,
-        "helm_lite/WMT 2014": 0.176,
-        "helm_mmlu/MMLU All Subjects": 0.793,
-        "helm_mmlu/Abstract Algebra": 0.6,
-        "helm_mmlu/Anatomy": 0.83,
-        "helm_mmlu/College Physics": 0.569,
-        "helm_mmlu/Computer Security": 0.86,
-        "helm_mmlu/Econometrics": 0.728,
-        "helm_mmlu/Global Facts": 0.52,
-        "helm_mmlu/Jurisprudence": 0.852,
-        "helm_mmlu/Philosophy": 0.842,
-        "helm_mmlu/Professional Psychology": 0.853,
-        "helm_mmlu/Us Foreign Policy": 0.85,
-        "helm_mmlu/Astronomy": 0.914,
-        "helm_mmlu/Business Ethics": 0.8,
-        "helm_mmlu/Clinical Knowledge": 0.857,
-        "helm_mmlu/Conceptual Physics": 0.864,
-        "helm_mmlu/Electrical Engineering": 0.779,
-        "helm_mmlu/Elementary Mathematics": 0.685,
-        "helm_mmlu/Formal Logic": 0.603,
-        "helm_mmlu/High School World History": 0.928,
-        "helm_mmlu/Human Sexuality": 0.901,
-        "helm_mmlu/International Law": 0.917,
-        "helm_mmlu/Logical Fallacies": 0.865,
-        "helm_mmlu/Machine Learning": 0.616,
-        "helm_mmlu/Management": 0.903,
-        "helm_mmlu/Marketing": 0.927,
-        "helm_mmlu/Medical Genetics": 0.83,
-        "helm_mmlu/Miscellaneous": 0.916,
-        "helm_mmlu/Moral Scenarios": 0.831,
-        "helm_mmlu/Nutrition": 0.846,
-        "helm_mmlu/Prehistory": 0.892,
-        "helm_mmlu/Public Relations": 0.827,
-        "helm_mmlu/Security Studies": 0.82,
-        "helm_mmlu/Sociology": 0.881,
-        "helm_mmlu/Virology": 0.59,
-        "helm_mmlu/World Religions": 0.871,
-        "helm_mmlu/Mean win rate": 0.258
-      }
-    }
-  ]
-}

data/developers/1-800-llms.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "1-800-LLMs",
-  "models": [
-    {
-      "id": "1-800-LLMs/Qwen-2.5-14B-Hindi",
-      "name": "Qwen-2.5-14B-Hindi",
-      "developer": "1-800-LLMs",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5826,
-        "hfopenllm_v2/BBH": 0.6524,
-        "hfopenllm_v2/MATH Level 5": 0.3331,
-        "hfopenllm_v2/GPQA": 0.3624,
-        "hfopenllm_v2/MUSR": 0.4489,
-        "hfopenllm_v2/MMLU-PRO": 0.5263
-      }
-    },
-    {
-      "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct",
-      "name": "Qwen-2.5-14B-Hindi-Custom-Instruct",
-      "developer": "1-800-LLMs",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3077,
-        "hfopenllm_v2/BBH": 0.6284,
-        "hfopenllm_v2/MATH Level 5": 0.3112,
-        "hfopenllm_v2/GPQA": 0.37,
-        "hfopenllm_v2/MUSR": 0.4491,
-        "hfopenllm_v2/MMLU-PRO": 0.5164
-      }
-    }
-  ]
-}

data/developers/1024m.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "1024m",
-  "models": [
-    {
-      "id": "1024m/PHI-4-Hindi",
-      "name": "PHI-4-Hindi",
-      "developer": "1024m",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.0082,
-        "hfopenllm_v2/BBH": 0.671,
-        "hfopenllm_v2/MATH Level 5": 0.2334,
-        "hfopenllm_v2/GPQA": 0.3977,
-        "hfopenllm_v2/MUSR": 0.4914,
-        "hfopenllm_v2/MMLU-PRO": 0.5239
-      }
-    },
-    {
-      "id": "1024m/QWEN-14B-B100",
-      "name": "QWEN-14B-B100",
-      "developer": "1024m",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7762,
-        "hfopenllm_v2/BBH": 0.6533,
-        "hfopenllm_v2/MATH Level 5": 0.5438,
-        "hfopenllm_v2/GPQA": 0.3507,
-        "hfopenllm_v2/MUSR": 0.41,
-        "hfopenllm_v2/MMLU-PRO": 0.5179
-      }
-    }
-  ]
-}

data/developers/152334h.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "152334H",
-  "models": [
-    {
-      "id": "152334H/miqu-1-70b-sf",
-      "name": "miqu-1-70b-sf",
-      "developer": "152334H",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5182,
-        "hfopenllm_v2/BBH": 0.6102,
-        "hfopenllm_v2/MATH Level 5": 0.1246,
-        "hfopenllm_v2/GPQA": 0.3507,
-        "hfopenllm_v2/MUSR": 0.4582,
-        "hfopenllm_v2/MMLU-PRO": 0.4228
-      }
-    }
-  ]
-}

data/developers/1tuanpham.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "1TuanPham",
-  "models": [
-    {
-      "id": "1TuanPham/T-VisStar-7B-v0.1",
-      "name": "T-VisStar-7B-v0.1",
-      "developer": "1TuanPham",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3607,
-        "hfopenllm_v2/BBH": 0.5052,
-        "hfopenllm_v2/MATH Level 5": 0.0574,
-        "hfopenllm_v2/GPQA": 0.2852,
-        "hfopenllm_v2/MUSR": 0.4375,
-        "hfopenllm_v2/MMLU-PRO": 0.3211
-      }
-    },
-    {
-      "id": "1TuanPham/T-VisStar-v0.1",
-      "name": "T-VisStar-v0.1",
-      "developer": "1TuanPham",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3607,
-        "hfopenllm_v2/BBH": 0.5052,
-        "hfopenllm_v2/MATH Level 5": 0.0574,
-        "hfopenllm_v2/GPQA": 0.2852,
-        "hfopenllm_v2/MUSR": 0.4375,
-        "hfopenllm_v2/MMLU-PRO": 0.3211
-      }
-    }
-  ]
-}

data/developers/3rd-degree-burn.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "developer": "3rd-Degree-Burn",
-  "models": [
-    {
-      "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B",
-      "name": "L-3.1-Science-Writer-8B",
-      "developer": "3rd-Degree-Burn",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4263,
-        "hfopenllm_v2/BBH": 0.5041,
-        "hfopenllm_v2/MATH Level 5": 0.1035,
-        "hfopenllm_v2/GPQA": 0.2743,
-        "hfopenllm_v2/MUSR": 0.3959,
-        "hfopenllm_v2/MMLU-PRO": 0.3649
-      }
-    },
-    {
-      "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot",
-      "name": "Llama-3.1-8B-Squareroot",
-      "developer": "3rd-Degree-Burn",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2213,
-        "hfopenllm_v2/BBH": 0.3461,
-        "hfopenllm_v2/MATH Level 5": 0.2659,
-        "hfopenllm_v2/GPQA": 0.2567,
-        "hfopenllm_v2/MUSR": 0.3089,
-        "hfopenllm_v2/MMLU-PRO": 0.175
-      }
-    },
-    {
-      "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1",
-      "name": "Llama-3.1-8B-Squareroot-v1",
-      "developer": "3rd-Degree-Burn",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2892,
-        "hfopenllm_v2/BBH": 0.3343,
-        "hfopenllm_v2/MATH Level 5": 0.0884,
-        "hfopenllm_v2/GPQA": 0.2559,
-        "hfopenllm_v2/MUSR": 0.3341,
-        "hfopenllm_v2/MMLU-PRO": 0.1127
-      }
-    },
-    {
-      "id": "3rd-Degree-Burn/Llama-Squared-8B",
-      "name": "Llama-Squared-8B",
-      "developer": "3rd-Degree-Burn",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2755,
-        "hfopenllm_v2/BBH": 0.4431,
-        "hfopenllm_v2/MATH Level 5": 0.0574,
-        "hfopenllm_v2/GPQA": 0.2718,
-        "hfopenllm_v2/MUSR": 0.3089,
-        "hfopenllm_v2/MMLU-PRO": 0.2366
-      }
-    }
-  ]
-}

data/developers/4season.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "4season",
-  "models": [
-    {
-      "id": "4season/final_model_test_v2",
-      "name": "final_model_test_v2",
-      "developer": "4season",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3191,
-        "hfopenllm_v2/BBH": 0.6342,
-        "hfopenllm_v2/MATH Level 5": 0.0838,
-        "hfopenllm_v2/GPQA": 0.3272,
-        "hfopenllm_v2/MUSR": 0.4314,
-        "hfopenllm_v2/MMLU-PRO": 0.3528
-      }
-    }
-  ]
-}

data/developers/aaditya.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "aaditya",
-  "models": [
-    {
-      "id": "aaditya/Llama3-OpenBioLLM-70B",
-      "name": "Llama3-OpenBioLLM-70B",
-      "developer": "aaditya",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7597,
-        "hfopenllm_v2/BBH": 0.6399,
-        "hfopenllm_v2/MATH Level 5": 0.1971,
-        "hfopenllm_v2/GPQA": 0.323,
-        "hfopenllm_v2/MUSR": 0.4417,
-        "hfopenllm_v2/MMLU-PRO": 0.4867
-      }
-    }
-  ]
-}

data/developers/aalf.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "developer": "AALF",
-  "models": [
-    {
-      "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview",
-      "name": "FuseChat-Llama-3.1-8B-Instruct-preview",
-      "developer": "AALF",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.719,
-        "hfopenllm_v2/BBH": 0.512,
-        "hfopenllm_v2/MATH Level 5": 0.2477,
-        "hfopenllm_v2/GPQA": 0.3054,
-        "hfopenllm_v2/MUSR": 0.382,
-        "hfopenllm_v2/MMLU-PRO": 0.3733
-      }
-    },
-    {
-      "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview",
-      "name": "FuseChat-Llama-3.1-8B-SFT-preview",
-      "developer": "AALF",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7281,
-        "hfopenllm_v2/BBH": 0.524,
-        "hfopenllm_v2/MATH Level 5": 0.2251,
-        "hfopenllm_v2/GPQA": 0.3045,
-        "hfopenllm_v2/MUSR": 0.402,
-        "hfopenllm_v2/MMLU-PRO": 0.3743
-      }
-    },
-    {
-      "id": "AALF/gemma-2-27b-it-SimPO-37K",
-      "name": "gemma-2-27b-it-SimPO-37K",
-      "developer": "AALF",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2407,
-        "hfopenllm_v2/BBH": 0.3911,
-        "hfopenllm_v2/MATH Level 5": 0.0128,
-        "hfopenllm_v2/GPQA": 0.2802,
-        "hfopenllm_v2/MUSR": 0.3488,
-        "hfopenllm_v2/MMLU-PRO": 0.1971
-      }
-    },
-    {
-      "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps",
-      "name": "gemma-2-27b-it-SimPO-37K-100steps",
-      "developer": "AALF",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2568,
-        "hfopenllm_v2/BBH": 0.3931,
-        "hfopenllm_v2/MATH Level 5": 0.0211,
-        "hfopenllm_v2/GPQA": 0.2886,
-        "hfopenllm_v2/MUSR": 0.3329,
-        "hfopenllm_v2/MMLU-PRO": 0.2125
-      }
-    }
-  ]
-}

data/developers/aashraf995.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "developer": "Aashraf995",
-  "models": [
-    {
-      "id": "Aashraf995/Creative-7B-nerd",
-      "name": "Creative-7B-nerd",
-      "developer": "Aashraf995",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4722,
-        "hfopenllm_v2/BBH": 0.5607,
-        "hfopenllm_v2/MATH Level 5": 0.3165,
-        "hfopenllm_v2/GPQA": 0.3263,
-        "hfopenllm_v2/MUSR": 0.4515,
-        "hfopenllm_v2/MMLU-PRO": 0.4492
-      }
-    },
-    {
-      "id": "Aashraf995/Gemma-Evo-10B",
-      "name": "Gemma-Evo-10B",
-      "developer": "Aashraf995",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7332,
-        "hfopenllm_v2/BBH": 0.6044,
-        "hfopenllm_v2/MATH Level 5": 0.2228,
-        "hfopenllm_v2/GPQA": 0.354,
-        "hfopenllm_v2/MUSR": 0.4595,
-        "hfopenllm_v2/MMLU-PRO": 0.4275
-      }
-    },
-    {
-      "id": "Aashraf995/Qwen-Evo-7B",
-      "name": "Qwen-Evo-7B",
-      "developer": "Aashraf995",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4757,
-        "hfopenllm_v2/BBH": 0.5709,
-        "hfopenllm_v2/MATH Level 5": 0.3142,
-        "hfopenllm_v2/GPQA": 0.3255,
-        "hfopenllm_v2/MUSR": 0.4541,
-        "hfopenllm_v2/MMLU-PRO": 0.4462
-      }
-    },
-    {
-      "id": "Aashraf995/QwenStock-14B",
-      "name": "QwenStock-14B",
-      "developer": "Aashraf995",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5009,
-        "hfopenllm_v2/BBH": 0.655,
-        "hfopenllm_v2/MATH Level 5": 0.3573,
-        "hfopenllm_v2/GPQA": 0.3893,
-        "hfopenllm_v2/MUSR": 0.4793,
-        "hfopenllm_v2/MMLU-PRO": 0.5382
-      }
-    }
-  ]
-}

data/developers/abacusai.json DELETED Viewed

@@ -1,145 +0,0 @@
-{
-  "developer": "abacusai",
-  "models": [
-    {
-      "id": "abacusai/bigstral-12b-32k",
-      "name": "bigstral-12b-32k",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4194,
-        "hfopenllm_v2/BBH": 0.47,
-        "hfopenllm_v2/MATH Level 5": 0.0151,
-        "hfopenllm_v2/GPQA": 0.2928,
-        "hfopenllm_v2/MUSR": 0.456,
-        "hfopenllm_v2/MMLU-PRO": 0.2641
-      }
-    },
-    {
-      "id": "abacusai/bigyi-15b",
-      "name": "bigyi-15b",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2094,
-        "hfopenllm_v2/BBH": 0.4345,
-        "hfopenllm_v2/MATH Level 5": 0.0295,
-        "hfopenllm_v2/GPQA": 0.3096,
-        "hfopenllm_v2/MUSR": 0.3538,
-        "hfopenllm_v2/MMLU-PRO": 0.3003
-      }
-    },
-    {
-      "id": "abacusai/Dracarys-72B-Instruct",
-      "name": "Dracarys-72B-Instruct",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7856,
-        "hfopenllm_v2/BBH": 0.6944,
-        "hfopenllm_v2/MATH Level 5": 0.3965,
-        "hfopenllm_v2/GPQA": 0.3909,
-        "hfopenllm_v2/MUSR": 0.4558,
-        "hfopenllm_v2/MMLU-PRO": 0.5456
-      }
-    },
-    {
-      "id": "abacusai/Liberated-Qwen1.5-14B",
-      "name": "Liberated-Qwen1.5-14B",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3631,
-        "hfopenllm_v2/BBH": 0.4948,
-        "hfopenllm_v2/MATH Level 5": 0.1601,
-        "hfopenllm_v2/GPQA": 0.2836,
-        "hfopenllm_v2/MUSR": 0.4175,
-        "hfopenllm_v2/MMLU-PRO": 0.3512
-      }
-    },
-    {
-      "id": "abacusai/Llama-3-Smaug-8B",
-      "name": "Llama-3-Smaug-8B",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4867,
-        "hfopenllm_v2/BBH": 0.4931,
-        "hfopenllm_v2/MATH Level 5": 0.0853,
-        "hfopenllm_v2/GPQA": 0.2483,
-        "hfopenllm_v2/MUSR": 0.3622,
-        "hfopenllm_v2/MMLU-PRO": 0.3185
-      }
-    },
-    {
-      "id": "abacusai/Smaug-34B-v0.1",
-      "name": "Smaug-34B-v0.1",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5016,
-        "hfopenllm_v2/BBH": 0.5358,
-        "hfopenllm_v2/MATH Level 5": 0.0718,
-        "hfopenllm_v2/GPQA": 0.3297,
-        "hfopenllm_v2/MUSR": 0.3979,
-        "hfopenllm_v2/MMLU-PRO": 0.4543
-      }
-    },
-    {
-      "id": "abacusai/Smaug-72B-v0.1",
-      "name": "Smaug-72B-v0.1",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5167,
-        "hfopenllm_v2/BBH": 0.5996,
-        "hfopenllm_v2/MATH Level 5": 0.1911,
-        "hfopenllm_v2/GPQA": 0.3238,
-        "hfopenllm_v2/MUSR": 0.4473,
-        "hfopenllm_v2/MMLU-PRO": 0.4624
-      }
-    },
-    {
-      "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K",
-      "name": "Smaug-Llama-3-70B-Instruct-32K",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7761,
-        "hfopenllm_v2/BBH": 0.6493,
-        "hfopenllm_v2/MATH Level 5": 0.2749,
-        "hfopenllm_v2/GPQA": 0.2961,
-        "hfopenllm_v2/MUSR": 0.4208,
-        "hfopenllm_v2/MMLU-PRO": 0.4765
-      }
-    },
-    {
-      "id": "abacusai/Smaug-Mixtral-v0.1",
-      "name": "Smaug-Mixtral-v0.1",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5554,
-        "hfopenllm_v2/BBH": 0.5162,
-        "hfopenllm_v2/MATH Level 5": 0.0952,
-        "hfopenllm_v2/GPQA": 0.3012,
-        "hfopenllm_v2/MUSR": 0.4298,
-        "hfopenllm_v2/MMLU-PRO": 0.3352
-      }
-    },
-    {
-      "id": "abacusai/Smaug-Qwen2-72B-Instruct",
-      "name": "Smaug-Qwen2-72B-Instruct",
-      "developer": "abacusai",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7825,
-        "hfopenllm_v2/BBH": 0.691,
-        "hfopenllm_v2/MATH Level 5": 0.4131,
-        "hfopenllm_v2/GPQA": 0.3616,
-        "hfopenllm_v2/MUSR": 0.4401,
-        "hfopenllm_v2/MMLU-PRO": 0.519
-      }
-    }
-  ]
-}

data/developers/abacusresearch.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "AbacusResearch",
-  "models": [
-    {
-      "id": "AbacusResearch/Jallabi-34B",
-      "name": "Jallabi-34B",
-      "developer": "AbacusResearch",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3529,
-        "hfopenllm_v2/BBH": 0.6023,
-        "hfopenllm_v2/MATH Level 5": 0.0521,
-        "hfopenllm_v2/GPQA": 0.3389,
-        "hfopenllm_v2/MUSR": 0.4822,
-        "hfopenllm_v2/MMLU-PRO": 0.4682
-      }
-    }
-  ]
-}

data/developers/abhishek.json DELETED Viewed

@@ -1,75 +0,0 @@
-{
-  "developer": "abhishek",
-  "models": [
-    {
-      "id": "abhishek/autotrain-0tmgq-5tpbg",
-      "name": "autotrain-0tmgq-5tpbg",
-      "developer": "abhishek",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.1952,
-        "hfopenllm_v2/BBH": 0.3127,
-        "hfopenllm_v2/MATH Level 5": 0.0128,
-        "hfopenllm_v2/GPQA": 0.2592,
-        "hfopenllm_v2/MUSR": 0.3584,
-        "hfopenllm_v2/MMLU-PRO": 0.1144
-      }
-    },
-    {
-      "id": "abhishek/autotrain-llama3-70b-orpo-v1",
-      "name": "autotrain-llama3-70b-orpo-v1",
-      "developer": "abhishek",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4233,
-        "hfopenllm_v2/BBH": 0.5998,
-        "hfopenllm_v2/MATH Level 5": 0.0106,
-        "hfopenllm_v2/GPQA": 0.2441,
-        "hfopenllm_v2/MUSR": 0.3579,
-        "hfopenllm_v2/MMLU-PRO": 0.1122
-      }
-    },
-    {
-      "id": "abhishek/autotrain-llama3-70b-orpo-v2",
-      "name": "autotrain-llama3-70b-orpo-v2",
-      "developer": "abhishek",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5406,
-        "hfopenllm_v2/BBH": 0.5899,
-        "hfopenllm_v2/MATH Level 5": 0.2107,
-        "hfopenllm_v2/GPQA": 0.2936,
-        "hfopenllm_v2/MUSR": 0.4113,
-        "hfopenllm_v2/MMLU-PRO": 0.4818
-      }
-    },
-    {
-      "id": "abhishek/autotrain-llama3-orpo-v2",
-      "name": "autotrain-llama3-orpo-v2",
-      "developer": "abhishek",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4372,
-        "hfopenllm_v2/BBH": 0.3159,
-        "hfopenllm_v2/MATH Level 5": 0.0468,
-        "hfopenllm_v2/GPQA": 0.2668,
-        "hfopenllm_v2/MUSR": 0.3792,
-        "hfopenllm_v2/MMLU-PRO": 0.2218
-      }
-    },
-    {
-      "id": "abhishek/autotrain-vr4a1-e5mms",
-      "name": "autotrain-vr4a1-e5mms",
-      "developer": "abhishek",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2142,
-        "hfopenllm_v2/BBH": 0.5001,
-        "hfopenllm_v2/MATH Level 5": 0.1412,
-        "hfopenllm_v2/GPQA": 0.3196,
-        "hfopenllm_v2/MUSR": 0.3891,
-        "hfopenllm_v2/MMLU-PRO": 0.3667
-      }
-    }
-  ]
-}

data/developers/abideen.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "abideen",
-  "models": [
-    {
-      "id": "abideen/MedPhi-4-14B-v1",
-      "name": "MedPhi-4-14B-v1",
-      "developer": "abideen",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6277,
-        "hfopenllm_v2/BBH": 0.6897,
-        "hfopenllm_v2/MATH Level 5": 0.2931,
-        "hfopenllm_v2/GPQA": 0.344,
-        "hfopenllm_v2/MUSR": 0.4155,
-        "hfopenllm_v2/MMLU-PRO": 0.5338
-      }
-    }
-  ]
-}

data/developers/adamo1139.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "adamo1139",
-  "models": [
-    {
-      "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2",
-      "name": "Yi-34B-200K-AEZAKMI-v2",
-      "developer": "adamo1139",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4555,
-        "hfopenllm_v2/BBH": 0.5384,
-        "hfopenllm_v2/MATH Level 5": 0.0566,
-        "hfopenllm_v2/GPQA": 0.3322,
-        "hfopenllm_v2/MUSR": 0.3886,
-        "hfopenllm_v2/MMLU-PRO": 0.4513
-      }
-    }
-  ]
-}

data/developers/adriszmar.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "adriszmar",
-  "models": [
-    {
-      "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES",
-      "name": "QAIMath-Qwen2.5-7B-TIES",
-      "developer": "adriszmar",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.1746,
-        "hfopenllm_v2/BBH": 0.3126,
-        "hfopenllm_v2/MATH Level 5": 0.0,
-        "hfopenllm_v2/GPQA": 0.245,
-        "hfopenllm_v2/MUSR": 0.4096,
-        "hfopenllm_v2/MMLU-PRO": 0.1087
-      }
-    }
-  ]
-}

data/developers/aellm.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "AELLM",
-  "models": [
-    {
-      "id": "AELLM/gemma-2-aeria-infinity-9b",
-      "name": "gemma-2-aeria-infinity-9b",
-      "developer": "AELLM",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7594,
-        "hfopenllm_v2/BBH": 0.5983,
-        "hfopenllm_v2/MATH Level 5": 0.2145,
-        "hfopenllm_v2/GPQA": 0.3339,
-        "hfopenllm_v2/MUSR": 0.402,
-        "hfopenllm_v2/MMLU-PRO": 0.3862
-      }
-    },
-    {
-      "id": "AELLM/gemma-2-lyco-infinity-9b",
-      "name": "gemma-2-lyco-infinity-9b",
-      "developer": "AELLM",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7316,
-        "hfopenllm_v2/BBH": 0.584,
-        "hfopenllm_v2/MATH Level 5": 0.1707,
-        "hfopenllm_v2/GPQA": 0.328,
-        "hfopenllm_v2/MUSR": 0.4006,
-        "hfopenllm_v2/MMLU-PRO": 0.3787
-      }
-    }
-  ]
-}

data/developers/aevalone.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "aevalone",
-  "models": [
-    {
-      "id": "aevalone/distill_qw_test",
-      "name": "distill_qw_test",
-      "developer": "aevalone",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7409,
-        "hfopenllm_v2/BBH": 0.5246,
-        "hfopenllm_v2/MATH Level 5": 0.4781,
-        "hfopenllm_v2/GPQA": 0.3003,
-        "hfopenllm_v2/MUSR": 0.386,
-        "hfopenllm_v2/MMLU-PRO": 0.4092
-      }
-    }
-  ]
-}

data/developers/agentlans.json DELETED Viewed

@@ -1,131 +0,0 @@
-{
-  "developer": "agentlans",
-  "models": [
-    {
-      "id": "agentlans/Gemma2-9B-AdvancedFuse",
-      "name": "Gemma2-9B-AdvancedFuse",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.1543,
-        "hfopenllm_v2/BBH": 0.5859,
-        "hfopenllm_v2/MATH Level 5": 0.1005,
-        "hfopenllm_v2/GPQA": 0.3347,
-        "hfopenllm_v2/MUSR": 0.4231,
-        "hfopenllm_v2/MMLU-PRO": 0.4
-      }
-    },
-    {
-      "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K",
-      "name": "Llama-3.2-1B-Instruct-CrashCourse12K",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5395,
-        "hfopenllm_v2/BBH": 0.3548,
-        "hfopenllm_v2/MATH Level 5": 0.071,
-        "hfopenllm_v2/GPQA": 0.2408,
-        "hfopenllm_v2/MUSR": 0.321,
-        "hfopenllm_v2/MMLU-PRO": 0.1809
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-8B-drill",
-      "name": "Llama3.1-8B-drill",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7652,
-        "hfopenllm_v2/BBH": 0.5016,
-        "hfopenllm_v2/MATH Level 5": 0.1715,
-        "hfopenllm_v2/GPQA": 0.2676,
-        "hfopenllm_v2/MUSR": 0.3672,
-        "hfopenllm_v2/MMLU-PRO": 0.3776
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-Daredevilish",
-      "name": "Llama3.1-Daredevilish",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6292,
-        "hfopenllm_v2/BBH": 0.5013,
-        "hfopenllm_v2/MATH Level 5": 0.1292,
-        "hfopenllm_v2/GPQA": 0.3012,
-        "hfopenllm_v2/MUSR": 0.4091,
-        "hfopenllm_v2/MMLU-PRO": 0.3697
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-Daredevilish-Instruct",
-      "name": "Llama3.1-Daredevilish-Instruct",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7926,
-        "hfopenllm_v2/BBH": 0.5235,
-        "hfopenllm_v2/MATH Level 5": 0.1722,
-        "hfopenllm_v2/GPQA": 0.307,
-        "hfopenllm_v2/MUSR": 0.3911,
-        "hfopenllm_v2/MMLU-PRO": 0.3877
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-LexiHermes-SuperStorm",
-      "name": "Llama3.1-LexiHermes-SuperStorm",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7835,
-        "hfopenllm_v2/BBH": 0.5266,
-        "hfopenllm_v2/MATH Level 5": 0.1616,
-        "hfopenllm_v2/GPQA": 0.323,
-        "hfopenllm_v2/MUSR": 0.3963,
-        "hfopenllm_v2/MMLU-PRO": 0.3844
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-SuperDeepFuse",
-      "name": "Llama3.1-SuperDeepFuse",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7762,
-        "hfopenllm_v2/BBH": 0.5049,
-        "hfopenllm_v2/MATH Level 5": 0.1828,
-        "hfopenllm_v2/GPQA": 0.2743,
-        "hfopenllm_v2/MUSR": 0.3699,
-        "hfopenllm_v2/MMLU-PRO": 0.3775
-      }
-    },
-    {
-      "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K",
-      "name": "Llama3.1-SuperDeepFuse-CrashCourse12K",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7187,
-        "hfopenllm_v2/BBH": 0.5216,
-        "hfopenllm_v2/MATH Level 5": 0.1805,
-        "hfopenllm_v2/GPQA": 0.3129,
-        "hfopenllm_v2/MUSR": 0.4026,
-        "hfopenllm_v2/MMLU-PRO": 0.3631
-      }
-    },
-    {
-      "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout",
-      "name": "Qwen2.5-0.5B-Instruct-CrashCourse-dropout",
-      "developer": "agentlans",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2949,
-        "hfopenllm_v2/BBH": 0.3312,
-        "hfopenllm_v2/MATH Level 5": 0.0423,
-        "hfopenllm_v2/GPQA": 0.2634,
-        "hfopenllm_v2/MUSR": 0.3342,
-        "hfopenllm_v2/MMLU-PRO": 0.1608
-      }
-    }
-  ]
-}

data/developers/agi-0.json DELETED Viewed

@@ -1,47 +0,0 @@
-{
-  "developer": "AGI-0",
-  "models": [
-    {
-      "id": "AGI-0/Art-v0-3B",
-      "name": "Art-v0-3B",
-      "developer": "AGI-0",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3192,
-        "hfopenllm_v2/BBH": 0.3401,
-        "hfopenllm_v2/MATH Level 5": 0.2462,
-        "hfopenllm_v2/GPQA": 0.2592,
-        "hfopenllm_v2/MUSR": 0.3768,
-        "hfopenllm_v2/MMLU-PRO": 0.1179
-      }
-    },
-    {
-      "id": "AGI-0/Artificium-llama3.1-8B-001",
-      "name": "Artificium-llama3.1-8B-001",
-      "developer": "AGI-0",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.5248,
-        "hfopenllm_v2/BBH": 0.4256,
-        "hfopenllm_v2/MATH Level 5": 0.136,
-        "hfopenllm_v2/GPQA": 0.2659,
-        "hfopenllm_v2/MUSR": 0.3795,
-        "hfopenllm_v2/MMLU-PRO": 0.3182
-      }
-    },
-    {
-      "id": "AGI-0/smartllama3.1-8B-001",
-      "name": "smartllama3.1-8B-001",
-      "developer": "AGI-0",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3518,
-        "hfopenllm_v2/BBH": 0.467,
-        "hfopenllm_v2/MATH Level 5": 0.1299,
-        "hfopenllm_v2/GPQA": 0.3062,
-        "hfopenllm_v2/MUSR": 0.4386,
-        "hfopenllm_v2/MMLU-PRO": 0.3487
-      }
-    }
-  ]
-}

data/developers/ahdoot.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "Ahdoot",
-  "models": [
-    {
-      "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure",
-      "name": "StructuredThinker-v0.3-MoreStructure",
-      "developer": "Ahdoot",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4193,
-        "hfopenllm_v2/BBH": 0.4838,
-        "hfopenllm_v2/MATH Level 5": 0.2908,
-        "hfopenllm_v2/GPQA": 0.297,
-        "hfopenllm_v2/MUSR": 0.4158,
-        "hfopenllm_v2/MMLU-PRO": 0.361
-      }
-    },
-    {
-      "id": "Ahdoot/Test_StealthThinker",
-      "name": "Test_StealthThinker",
-      "developer": "Ahdoot",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.422,
-        "hfopenllm_v2/BBH": 0.4647,
-        "hfopenllm_v2/MATH Level 5": 0.179,
-        "hfopenllm_v2/GPQA": 0.2961,
-        "hfopenllm_v2/MUSR": 0.428,
-        "hfopenllm_v2/MMLU-PRO": 0.3597
-      }
-    }
-  ]
-}

data/developers/ahjeong.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "Ahjeong",
-  "models": [
-    {
-      "id": "Ahjeong/MMPO_Gemma_7b",
-      "name": "Ahjeong/MMPO_Gemma_7b",
-      "developer": "Ahjeong",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7587,
-        "reward-bench/Chat": 0.9693,
-        "reward-bench/Chat Hard": 0.614,
-        "reward-bench/Safety": 0.7135,
-        "reward-bench/Reasoning": 0.7756,
-        "reward-bench/Prior Sets (0.5 weight)": 0.6831
-      }
-    },
-    {
-      "id": "Ahjeong/MMPO_Gemma_7b_gamma1.1_epoch3",
-      "name": "Ahjeong/MMPO_Gemma_7b_gamma1.1_epoch3",
-      "developer": "Ahjeong",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7652,
-        "reward-bench/Chat": 0.9721,
-        "reward-bench/Chat Hard": 0.6338,
-        "reward-bench/Safety": 0.7635,
-        "reward-bench/Reasoning": 0.7284,
-        "reward-bench/Prior Sets (0.5 weight)": 0.6913
-      }
-    }
-  ]
-}

data/developers/ahmeda335.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "ahmeda335",
-  "models": [
-    {
-      "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b",
-      "name": "13_outOf_32_pruned_layers_llama3.1-8b",
-      "developer": "ahmeda335",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.1748,
-        "hfopenllm_v2/BBH": 0.2883,
-        "hfopenllm_v2/MATH Level 5": 0.0,
-        "hfopenllm_v2/GPQA": 0.2592,
-        "hfopenllm_v2/MUSR": 0.3803,
-        "hfopenllm_v2/MMLU-PRO": 0.1129
-      }
-    }
-  ]
-}

data/developers/ai-mo.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "AI-MO",
-  "models": [
-    {
-      "id": "AI-MO/NuminaMath-7B-CoT",
-      "name": "NuminaMath-7B-CoT",
-      "developer": "AI-MO",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2689,
-        "hfopenllm_v2/BBH": 0.4314,
-        "hfopenllm_v2/MATH Level 5": 0.2696,
-        "hfopenllm_v2/GPQA": 0.2659,
-        "hfopenllm_v2/MUSR": 0.3303,
-        "hfopenllm_v2/MMLU-PRO": 0.2868
-      }
-    },
-    {
-      "id": "AI-MO/NuminaMath-7B-TIR",
-      "name": "NuminaMath-7B-TIR",
-      "developer": "AI-MO",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2756,
-        "hfopenllm_v2/BBH": 0.4144,
-        "hfopenllm_v2/MATH Level 5": 0.1609,
-        "hfopenllm_v2/GPQA": 0.2584,
-        "hfopenllm_v2/MUSR": 0.3509,
-        "hfopenllm_v2/MMLU-PRO": 0.2733
-      }
-    }
-  ]
-}

data/developers/ai-sweden-models.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "AI-Sweden-Models",
-  "models": [
-    {
-      "id": "AI-Sweden-Models/gpt-sw3-40b",
-      "name": "gpt-sw3-40b",
-      "developer": "AI-Sweden-Models",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.147,
-        "hfopenllm_v2/BBH": 0.3268,
-        "hfopenllm_v2/MATH Level 5": 0.0174,
-        "hfopenllm_v2/GPQA": 0.2349,
-        "hfopenllm_v2/MUSR": 0.3632,
-        "hfopenllm_v2/MMLU-PRO": 0.1276
-      }
-    },
-    {
-      "id": "AI-Sweden-Models/Llama-3-8B-instruct",
-      "name": "Llama-3-8B-instruct",
-      "developer": "AI-Sweden-Models",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2401,
-        "hfopenllm_v2/BBH": 0.4173,
-        "hfopenllm_v2/MATH Level 5": 0.0385,
-        "hfopenllm_v2/GPQA": 0.2659,
-        "hfopenllm_v2/MUSR": 0.4771,
-        "hfopenllm_v2/MMLU-PRO": 0.2597
-      }
-    }
-  ]
-}

data/developers/ai2.json DELETED Viewed

@@ -1,89 +0,0 @@
-{
-  "developer": "AI2",
-  "models": [
-    {
-      "id": "ai2/llama-2-chat-7b-nectar-3.8m.json",
-      "name": "ai2/llama-2-chat-7b-nectar-3.8m.json",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.5843,
-        "reward-bench/Chat": 0.8631,
-        "reward-bench/Chat Hard": 0.2654,
-        "reward-bench/Safety": 0.6243
-      }
-    },
-    {
-      "id": "ai2/llama-2-chat-nectar-180k.json",
-      "name": "ai2/llama-2-chat-nectar-180k.json",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.5235,
-        "reward-bench/Chat": 0.8827,
-        "reward-bench/Chat Hard": 0.2851,
-        "reward-bench/Safety": 0.4027
-      }
-    },
-    {
-      "id": "ai2/llama-2-chat-ultrafeedback-60k.jsonl",
-      "name": "ai2/llama-2-chat-ultrafeedback-60k.jsonl",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.644,
-        "reward-bench/Chat": 0.9441,
-        "reward-bench/Chat Hard": 0.4539,
-        "reward-bench/Safety": 0.5338
-      }
-    },
-    {
-      "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",
-      "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7008,
-        "reward-bench/Chat": 0.9385,
-        "reward-bench/Chat Hard": 0.3882,
-        "reward-bench/Safety": 0.7757
-      }
-    },
-    {
-      "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json",
-      "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.7127,
-        "reward-bench/Chat": 0.9358,
-        "reward-bench/Chat Hard": 0.4079,
-        "reward-bench/Safety": 0.7946
-      }
-    },
-    {
-      "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json",
-      "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.6756,
-        "reward-bench/Chat": 0.9134,
-        "reward-bench/Chat Hard": 0.3904,
-        "reward-bench/Safety": 0.723
-      }
-    },
-    {
-      "id": "ai2/tulu-2-7b-rm-v0.json",
-      "name": "ai2/tulu-2-7b-rm-v0.json",
-      "developer": "AI2",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "reward-bench/Score": 0.6655,
-        "reward-bench/Chat": 0.933,
-        "reward-bench/Chat Hard": 0.4539,
-        "reward-bench/Safety": 0.6095
-      }
-    }
-  ]
-}

data/developers/ai21.json DELETED Viewed

@@ -1,364 +0,0 @@
-{
-  "developer": "ai21",
-  "models": [
-    {
-      "id": "ai21/J1-Grande-v1-17B",
-      "name": "J1-Grande v1 17B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.433,
-        "helm_classic/MMLU": 0.27,
-        "helm_classic/BoolQ": 0.722,
-        "helm_classic/NarrativeQA": 0.672,
-        "helm_classic/NaturalQuestions (open-book)": 0.578,
-        "helm_classic/QuAC": 0.362,
-        "helm_classic/HellaSwag": 0.739,
-        "helm_classic/OpenbookQA": 0.52,
-        "helm_classic/TruthfulQA": 0.193,
-        "helm_classic/MS MARCO (TREC)": 0.341,
-        "helm_classic/CNN/DailyMail": 0.143,
-        "helm_classic/XSUM": 0.122,
-        "helm_classic/IMDB": 0.953,
-        "helm_classic/CivilComments": 0.529,
-        "helm_classic/RAFT": 0.658
-      }
-    },
-    {
-      "id": "ai21/J1-Grande-v2-beta-17B",
-      "name": "J1-Grande v2 beta 17B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.706,
-        "helm_classic/MMLU": 0.445,
-        "helm_classic/BoolQ": 0.812,
-        "helm_classic/NarrativeQA": 0.725,
-        "helm_classic/NaturalQuestions (open-book)": 0.625,
-        "helm_classic/QuAC": 0.392,
-        "helm_classic/HellaSwag": 0.764,
-        "helm_classic/OpenbookQA": 0.56,
-        "helm_classic/TruthfulQA": 0.306,
-        "helm_classic/MS MARCO (TREC)": 0.46,
-        "helm_classic/CNN/DailyMail": 0.146,
-        "helm_classic/XSUM": 0.152,
-        "helm_classic/IMDB": 0.957,
-        "helm_classic/CivilComments": 0.546,
-        "helm_classic/RAFT": 0.679
-      }
-    },
-    {
-      "id": "ai21/J1-Jumbo-v1-178B",
-      "name": "J1-Jumbo v1 178B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.517,
-        "helm_classic/MMLU": 0.259,
-        "helm_classic/BoolQ": 0.776,
-        "helm_classic/NarrativeQA": 0.695,
-        "helm_classic/NaturalQuestions (open-book)": 0.595,
-        "helm_classic/QuAC": 0.358,
-        "helm_classic/HellaSwag": 0.765,
-        "helm_classic/OpenbookQA": 0.534,
-        "helm_classic/TruthfulQA": 0.175,
-        "helm_classic/MS MARCO (TREC)": 0.363,
-        "helm_classic/CNN/DailyMail": 0.144,
-        "helm_classic/XSUM": 0.129,
-        "helm_classic/IMDB": 0.943,
-        "helm_classic/CivilComments": 0.553,
-        "helm_classic/RAFT": 0.681
-      }
-    },
-    {
-      "id": "ai21/J1-Large-v1-7.5B",
-      "name": "J1-Large v1 7.5B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.285,
-        "helm_classic/MMLU": 0.241,
-        "helm_classic/BoolQ": 0.683,
-        "helm_classic/NarrativeQA": 0.623,
-        "helm_classic/NaturalQuestions (open-book)": 0.532,
-        "helm_classic/QuAC": 0.328,
-        "helm_classic/HellaSwag": 0.7,
-        "helm_classic/OpenbookQA": 0.514,
-        "helm_classic/TruthfulQA": 0.197,
-        "helm_classic/MS MARCO (TREC)": 0.292,
-        "helm_classic/CNN/DailyMail": 0.134,
-        "helm_classic/XSUM": 0.102,
-        "helm_classic/IMDB": 0.956,
-        "helm_classic/CivilComments": 0.532,
-        "helm_classic/RAFT": 0.545
-      }
-    },
-    {
-      "id": "ai21/j2-grande",
-      "name": "Jurassic-2 Grande 17B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.172,
-        "helm_lite/NarrativeQA": 0.744,
-        "helm_lite/NaturalQuestions (closed-book)": 0.35,
-        "helm_lite/OpenbookQA": 0.614,
-        "helm_lite/MMLU": 0.471,
-        "helm_lite/MATH": 0.064,
-        "helm_lite/GSM8K": 0.159,
-        "helm_lite/LegalBench": 0.468,
-        "helm_lite/MedQA": 0.39,
-        "helm_lite/WMT 2014": 0.102
-      }
-    },
-    {
-      "id": "ai21/j2-jumbo",
-      "name": "Jurassic-2 Jumbo 178B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.215,
-        "helm_lite/NarrativeQA": 0.728,
-        "helm_lite/NaturalQuestions (closed-book)": 0.385,
-        "helm_lite/OpenbookQA": 0.688,
-        "helm_lite/MMLU": 0.483,
-        "helm_lite/MATH": 0.103,
-        "helm_lite/GSM8K": 0.239,
-        "helm_lite/LegalBench": 0.533,
-        "helm_lite/MedQA": 0.431,
-        "helm_lite/WMT 2014": 0.114
-      }
-    },
-    {
-      "id": "ai21/jamba-1.5-large",
-      "name": "Jamba 1.5 Large",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.637,
-        "helm_lite/NarrativeQA": 0.664,
-        "helm_lite/NaturalQuestions (closed-book)": 0.394,
-        "helm_lite/OpenbookQA": 0.948,
-        "helm_lite/MMLU": 0.683,
-        "helm_lite/MATH": 0.692,
-        "helm_lite/GSM8K": 0.846,
-        "helm_lite/LegalBench": 0.675,
-        "helm_lite/MedQA": 0.698,
-        "helm_lite/WMT 2014": 0.203,
-        "helm_mmlu/MMLU All Subjects": 0.782,
-        "helm_mmlu/Abstract Algebra": 0.53,
-        "helm_mmlu/Anatomy": 0.793,
-        "helm_mmlu/College Physics": 0.51,
-        "helm_mmlu/Computer Security": 0.8,
-        "helm_mmlu/Econometrics": 0.614,
-        "helm_mmlu/Global Facts": 0.54,
-        "helm_mmlu/Jurisprudence": 0.87,
-        "helm_mmlu/Philosophy": 0.849,
-        "helm_mmlu/Professional Psychology": 0.842,
-        "helm_mmlu/Us Foreign Policy": 0.92,
-        "helm_mmlu/Astronomy": 0.882,
-        "helm_mmlu/Business Ethics": 0.77,
-        "helm_mmlu/Clinical Knowledge": 0.849,
-        "helm_mmlu/Conceptual Physics": 0.779,
-        "helm_mmlu/Electrical Engineering": 0.793,
-        "helm_mmlu/Elementary Mathematics": 0.656,
-        "helm_mmlu/Formal Logic": 0.619,
-        "helm_mmlu/High School World History": 0.911,
-        "helm_mmlu/Human Sexuality": 0.832,
-        "helm_mmlu/International Law": 0.884,
-        "helm_mmlu/Logical Fallacies": 0.859,
-        "helm_mmlu/Machine Learning": 0.688,
-        "helm_mmlu/Management": 0.864,
-        "helm_mmlu/Marketing": 0.94,
-        "helm_mmlu/Medical Genetics": 0.89,
-        "helm_mmlu/Miscellaneous": 0.931,
-        "helm_mmlu/Moral Scenarios": 0.686,
-        "helm_mmlu/Nutrition": 0.869,
-        "helm_mmlu/Prehistory": 0.892,
-        "helm_mmlu/Public Relations": 0.755,
-        "helm_mmlu/Security Studies": 0.771,
-        "helm_mmlu/Sociology": 0.93,
-        "helm_mmlu/Virology": 0.554,
-        "helm_mmlu/World Religions": 0.865,
-        "helm_mmlu/Mean win rate": 0.147
-      }
-    },
-    {
-      "id": "ai21/jamba-1.5-mini",
-      "name": "Jamba 1.5 Mini",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.414,
-        "helm_lite/NarrativeQA": 0.746,
-        "helm_lite/NaturalQuestions (closed-book)": 0.388,
-        "helm_lite/OpenbookQA": 0.89,
-        "helm_lite/MMLU": 0.582,
-        "helm_lite/MATH": 0.318,
-        "helm_lite/GSM8K": 0.691,
-        "helm_lite/LegalBench": 0.503,
-        "helm_lite/MedQA": 0.632,
-        "helm_lite/WMT 2014": 0.179,
-        "helm_mmlu/MMLU All Subjects": 0.699,
-        "helm_mmlu/Abstract Algebra": 0.33,
-        "helm_mmlu/Anatomy": 0.711,
-        "helm_mmlu/College Physics": 0.48,
-        "helm_mmlu/Computer Security": 0.73,
-        "helm_mmlu/Econometrics": 0.491,
-        "helm_mmlu/Global Facts": 0.43,
-        "helm_mmlu/Jurisprudence": 0.88,
-        "helm_mmlu/Philosophy": 0.752,
-        "helm_mmlu/Professional Psychology": 0.76,
-        "helm_mmlu/Us Foreign Policy": 0.9,
-        "helm_mmlu/Astronomy": 0.822,
-        "helm_mmlu/Business Ethics": 0.76,
-        "helm_mmlu/Clinical Knowledge": 0.74,
-        "helm_mmlu/Conceptual Physics": 0.677,
-        "helm_mmlu/Electrical Engineering": 0.683,
-        "helm_mmlu/Elementary Mathematics": 0.553,
-        "helm_mmlu/Formal Logic": 0.452,
-        "helm_mmlu/High School World History": 0.84,
-        "helm_mmlu/Human Sexuality": 0.809,
-        "helm_mmlu/International Law": 0.893,
-        "helm_mmlu/Logical Fallacies": 0.81,
-        "helm_mmlu/Machine Learning": 0.509,
-        "helm_mmlu/Management": 0.825,
-        "helm_mmlu/Marketing": 0.915,
-        "helm_mmlu/Medical Genetics": 0.69,
-        "helm_mmlu/Miscellaneous": 0.902,
-        "helm_mmlu/Moral Scenarios": 0.269,
-        "helm_mmlu/Nutrition": 0.801,
-        "helm_mmlu/Prehistory": 0.824,
-        "helm_mmlu/Public Relations": 0.727,
-        "helm_mmlu/Security Studies": 0.755,
-        "helm_mmlu/Sociology": 0.876,
-        "helm_mmlu/Virology": 0.578,
-        "helm_mmlu/World Religions": 0.842,
-        "helm_mmlu/Mean win rate": 0.206
-      }
-    },
-    {
-      "id": "ai21/jamba-instruct",
-      "name": "Jamba Instruct",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_lite/Mean win rate": 0.287,
-        "helm_lite/NarrativeQA": 0.658,
-        "helm_lite/NaturalQuestions (closed-book)": 0.384,
-        "helm_lite/OpenbookQA": 0.796,
-        "helm_lite/MMLU": 0.582,
-        "helm_lite/MATH": 0.38,
-        "helm_lite/GSM8K": 0.67,
-        "helm_lite/LegalBench": 0.54,
-        "helm_lite/MedQA": 0.519,
-        "helm_lite/WMT 2014": 0.164,
-        "helm_mmlu/MMLU All Subjects": 0.659,
-        "helm_mmlu/Abstract Algebra": 0.36,
-        "helm_mmlu/Anatomy": 0.615,
-        "helm_mmlu/College Physics": 0.422,
-        "helm_mmlu/Computer Security": 0.76,
-        "helm_mmlu/Econometrics": 0.439,
-        "helm_mmlu/Global Facts": 0.4,
-        "helm_mmlu/Jurisprudence": 0.796,
-        "helm_mmlu/Philosophy": 0.749,
-        "helm_mmlu/Professional Psychology": 0.716,
-        "helm_mmlu/Us Foreign Policy": 0.91,
-        "helm_mmlu/Astronomy": 0.73,
-        "helm_mmlu/Business Ethics": 0.6,
-        "helm_mmlu/Clinical Knowledge": 0.702,
-        "helm_mmlu/Conceptual Physics": 0.677,
-        "helm_mmlu/Electrical Engineering": 0.621,
-        "helm_mmlu/Elementary Mathematics": 0.497,
-        "helm_mmlu/Formal Logic": 0.444,
-        "helm_mmlu/High School World History": 0.797,
-        "helm_mmlu/Human Sexuality": 0.794,
-        "helm_mmlu/International Law": 0.835,
-        "helm_mmlu/Logical Fallacies": 0.706,
-        "helm_mmlu/Machine Learning": 0.536,
-        "helm_mmlu/Management": 0.786,
-        "helm_mmlu/Marketing": 0.885,
-        "helm_mmlu/Medical Genetics": 0.67,
-        "helm_mmlu/Miscellaneous": 0.865,
-        "helm_mmlu/Moral Scenarios": 0.465,
-        "helm_mmlu/Nutrition": 0.745,
-        "helm_mmlu/Prehistory": 0.796,
-        "helm_mmlu/Public Relations": 0.682,
-        "helm_mmlu/Security Studies": 0.743,
-        "helm_mmlu/Sociology": 0.891,
-        "helm_mmlu/Virology": 0.53,
-        "helm_mmlu/World Religions": 0.813,
-        "helm_mmlu/Mean win rate": 0.887
-      }
-    },
-    {
-      "id": "ai21/Jurassic-2-Grande-17B",
-      "name": "Jurassic-2 Grande 17B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.743,
-        "helm_classic/MMLU": 0.475,
-        "helm_classic/BoolQ": 0.826,
-        "helm_classic/NarrativeQA": 0.737,
-        "helm_classic/NaturalQuestions (open-book)": 0.639,
-        "helm_classic/QuAC": 0.418,
-        "helm_classic/HellaSwag": 0.781,
-        "helm_classic/OpenbookQA": 0.542,
-        "helm_classic/TruthfulQA": 0.348,
-        "helm_classic/MS MARCO (TREC)": 0.514,
-        "helm_classic/CNN/DailyMail": 0.144,
-        "helm_classic/XSUM": 0.167,
-        "helm_classic/IMDB": 0.938,
-        "helm_classic/CivilComments": 0.547,
-        "helm_classic/RAFT": 0.712
-      }
-    },
-    {
-      "id": "ai21/Jurassic-2-Jumbo-178B",
-      "name": "Jurassic-2 Jumbo 178B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.824,
-        "helm_classic/MMLU": 0.48,
-        "helm_classic/BoolQ": 0.829,
-        "helm_classic/NarrativeQA": 0.733,
-        "helm_classic/NaturalQuestions (open-book)": 0.669,
-        "helm_classic/QuAC": 0.435,
-        "helm_classic/HellaSwag": 0.788,
-        "helm_classic/OpenbookQA": 0.558,
-        "helm_classic/TruthfulQA": 0.437,
-        "helm_classic/MS MARCO (TREC)": 0.661,
-        "helm_classic/CNN/DailyMail": 0.149,
-        "helm_classic/XSUM": 0.182,
-        "helm_classic/IMDB": 0.938,
-        "helm_classic/CivilComments": 0.57,
-        "helm_classic/RAFT": 0.746
-      }
-    },
-    {
-      "id": "ai21/Jurassic-2-Large-7.5B",
-      "name": "Jurassic-2 Large 7.5B",
-      "developer": "ai21",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "helm_classic/Mean win rate": 0.553,
-        "helm_classic/MMLU": 0.339,
-        "helm_classic/BoolQ": 0.742,
-        "helm_classic/NarrativeQA": -1.0,
-        "helm_classic/NaturalQuestions (open-book)": 0.589,
-        "helm_classic/QuAC": -1.0,
-        "helm_classic/HellaSwag": 0.729,
-        "helm_classic/OpenbookQA": 0.53,
-        "helm_classic/TruthfulQA": 0.245,
-        "helm_classic/MS MARCO (TREC)": 0.464,
-        "helm_classic/CNN/DailyMail": 0.136,
-        "helm_classic/XSUM": 0.142,
-        "helm_classic/IMDB": 0.956,
-        "helm_classic/CivilComments": 0.57,
-        "helm_classic/RAFT": 0.622
-      }
-    }
-  ]
-}

data/developers/ai21labs.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "ai21labs",
-  "models": [
-    {
-      "id": "ai21labs/Jamba-v0.1",
-      "name": "Jamba-v0.1",
-      "developer": "ai21labs",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2026,
-        "hfopenllm_v2/BBH": 0.3602,
-        "hfopenllm_v2/MATH Level 5": 0.0159,
-        "hfopenllm_v2/GPQA": 0.2685,
-        "hfopenllm_v2/MUSR": 0.359,
-        "hfopenllm_v2/MMLU-PRO": 0.2492
-      }
-    }
-  ]
-}

data/developers/ai4bharat.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "ai4bharat",
-  "models": [
-    {
-      "id": "ai4bharat/Airavata",
-      "name": "Airavata",
-      "developer": "ai4bharat",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.0559,
-        "hfopenllm_v2/BBH": 0.3628,
-        "hfopenllm_v2/MATH Level 5": 0.0181,
-        "hfopenllm_v2/GPQA": 0.2743,
-        "hfopenllm_v2/MUSR": 0.3763,
-        "hfopenllm_v2/MMLU-PRO": 0.1635
-      }
-    }
-  ]
-}

data/developers/ai4free.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "developer": "AI4free",
-  "models": [
-    {
-      "id": "AI4free/Dhanishtha",
-      "name": "Dhanishtha",
-      "developer": "AI4free",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.2451,
-        "hfopenllm_v2/BBH": 0.3404,
-        "hfopenllm_v2/MATH Level 5": 0.256,
-        "hfopenllm_v2/GPQA": 0.2525,
-        "hfopenllm_v2/MUSR": 0.3569,
-        "hfopenllm_v2/MMLU-PRO": 0.1643
-      }
-    },
-    {
-      "id": "AI4free/t2",
-      "name": "t2",
-      "developer": "AI4free",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.3867,
-        "hfopenllm_v2/BBH": 0.291,
-        "hfopenllm_v2/MATH Level 5": 0.1896,
-        "hfopenllm_v2/GPQA": 0.2576,
-        "hfopenllm_v2/MUSR": 0.3846,
-        "hfopenllm_v2/MMLU-PRO": 0.1144
-      }
-    }
-  ]
-}

data/developers/aicoressecurity.json DELETED Viewed

@@ -1,61 +0,0 @@
-{
-  "developer": "AicoresSecurity",
-  "models": [
-    {
-      "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0",
-      "name": "Cybernet-Sec-3B-R1-V0",
-      "developer": "AicoresSecurity",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6358,
-        "hfopenllm_v2/BBH": 0.4497,
-        "hfopenllm_v2/MATH Level 5": 0.1156,
-        "hfopenllm_v2/GPQA": 0.2634,
-        "hfopenllm_v2/MUSR": 0.3314,
-        "hfopenllm_v2/MMLU-PRO": 0.301
-      }
-    },
-    {
-      "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder",
-      "name": "Cybernet-Sec-3B-R1-V0-Coder",
-      "developer": "AicoresSecurity",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.7098,
-        "hfopenllm_v2/BBH": 0.4478,
-        "hfopenllm_v2/MATH Level 5": 0.1488,
-        "hfopenllm_v2/GPQA": 0.2718,
-        "hfopenllm_v2/MUSR": 0.3408,
-        "hfopenllm_v2/MMLU-PRO": 0.3178
-      }
-    },
-    {
-      "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1",
-      "name": "Cybernet-Sec-3B-R1-V1",
-      "developer": "AicoresSecurity",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.6146,
-        "hfopenllm_v2/BBH": 0.4282,
-        "hfopenllm_v2/MATH Level 5": 0.1518,
-        "hfopenllm_v2/GPQA": 0.2609,
-        "hfopenllm_v2/MUSR": 0.3287,
-        "hfopenllm_v2/MMLU-PRO": 0.2876
-      }
-    },
-    {
-      "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1",
-      "name": "Cybernet-Sec-3B-R1-V1.1",
-      "developer": "AicoresSecurity",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.673,
-        "hfopenllm_v2/BBH": 0.4392,
-        "hfopenllm_v2/MATH Level 5": 0.176,
-        "hfopenllm_v2/GPQA": 0.271,
-        "hfopenllm_v2/MUSR": 0.3541,
-        "hfopenllm_v2/MMLU-PRO": 0.3088
-      }
-    }
-  ]
-}

data/developers/aidc-ai.json DELETED Viewed

@@ -1,19 +0,0 @@
-{
-  "developer": "AIDC-AI",
-  "models": [
-    {
-      "id": "AIDC-AI/Marco-o1",
-      "name": "Marco-o1",
-      "developer": "AIDC-AI",
-      "evaluator_relationship": null,
-      "benchmark_scores": {
-        "hfopenllm_v2/IFEval": 0.4771,
-        "hfopenllm_v2/BBH": 0.5364,
-        "hfopenllm_v2/MATH Level 5": 0.3746,
-        "hfopenllm_v2/GPQA": 0.2592,
-        "hfopenllm_v2/MUSR": 0.4138,
-        "hfopenllm_v2/MMLU-PRO": 0.4117
-      }
-    }
-  ]
-}