'use client' import { useState } from 'react' import BenchmarkExplainer from '@/components/BenchmarkExplainer' import LeaderboardTable from '@/components/LeaderboardTable' import GuardrailsTable from '@/components/GuardrailsTable' import GuardrailDistribution from '@/components/GuardrailDistribution' import AboutSection from '@/components/AboutSection' import ScoreDistribution from '@/components/ScoreDistribution' import InsightsSection from '@/components/InsightsSection' import ScrollReveal from '@/components/ScrollReveal' import type { ModelData, GuardrailData, MetricThresholds, GuardrailThresholds } from '@/lib/types' type Mode = 'models' | 'guardrails' const TOGGLE_OPTIONS = [ { key: 'models', label: 'LLM Models', subtitle: '' }, { key: 'guardrails', label: 'Safety Guardrails', subtitle: '' }, ] interface Props { models: ModelData[] guardrails: GuardrailData[] maxFairness: number thresholds: MetricThresholds guardrailThresholds: GuardrailThresholds modelCount: number guardrailCount: number } export default function EvaluationShell({ models, guardrails, maxFairness, thresholds, guardrailThresholds, modelCount, guardrailCount }: Props) { const [mode, setMode] = useState('models') const options = TOGGLE_OPTIONS.map(o => ({ ...o, subtitle: o.key === 'models' ? `Evaluating ${modelCount} models across 3 dimensions` : `Evaluating ${guardrailCount} guardrails on localised content safety`, })) return ( <>

Results

{mode === 'models' ? 'Score distribution across evaluation dimensions.' : 'Score distribution across detection metrics.'}

{mode === 'models' && (

Where models cluster

Each dot is one active model, coloured by lab. The dashed line marks the field average. Fairness is inverted: dots further right are more equitable.

)} {mode === 'guardrails' && (

Where guardrails cluster

Each dot is one guardrail, coloured by provider. The dashed line marks the field average. All metrics: higher is better.

)}

Leaderboard

{mode === 'models' ? 'Click any row to expand the full metric breakdown. Click a creator badge to filter. Fairness: ↓ lower = more equitable.' : 'Click any row to expand the full metric breakdown. Click a provider badge to filter. Higher scores = better detection.'}

{mode === 'models' ? : }

) }