NowAI-Bench / index.html
bradnow's picture
Update leaderboard: EVA pass@1 ranking, top-3 cards, footer alignment
85f0b3a
Raw
History Blame Contribute Delete
25.2 kB
<!DOCTYPE html>
<html class="light" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title>NOWAI-Bench | Enterprise AI Benchmarking</title>
<script src="https://cdn.tailwindcss.com?plugins=forms,container-queries"></script>
<link href="https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&amp;family=JetBrains+Mono:wght@400;500&amp;display=swap" rel="stylesheet"/>
<link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&amp;display=swap" rel="stylesheet"/>
<link rel="stylesheet" href="style.css"/>
<script id="tailwind-config">
tailwind.config = {
darkMode: "class",
theme: {
extend: {
"colors": {
"surface-muted": "#F4F7F9",
"tertiary-container": "#402302",
"error": "#ba1a1a",
"surface": "#faf9fb",
"surface-container-highest": "#e3e2e4",
"on-tertiary-fixed-variant": "#623f1b",
"error-container": "#ffdad6",
"surface-dim": "#dadadc",
"secondary": "#056e00",
"surface-container-high": "#e8e8ea",
"border-subtle": "#E1E8ED",
"secondary-container": "#80fe68",
"on-tertiary": "#ffffff",
"surface-tint": "#406279",
"on-primary-fixed-variant": "#284a61",
"inverse-primary": "#a8cbe6",
"infinite-blue": "#032D42",
"background": "#faf9fb",
"tertiary-fixed": "#ffdcbe",
"on-secondary": "#ffffff",
"tertiary-fixed-dim": "#efbd8e",
"surface-variant": "#e3e2e4",
"on-background": "#1a1c1d",
"surface-container": "#eeedef",
"primary-container": "#032d42",
"primary": "#001725",
"outline": "#72787d",
"surface-container-lowest": "#ffffff",
"surface-bright": "#faf9fb",
"tertiary": "#241100",
"secondary-fixed-dim": "#64e04f",
"wasabi-green": "#63DF4E",
"on-tertiary-fixed": "#2d1600",
"on-secondary-container": "#057500",
"outline-variant": "#c2c7cd",
"on-error": "#ffffff",
"on-error-container": "#93000a",
"on-surface": "#1a1c1d",
"data-neutral": "#64748B",
"on-primary": "#ffffff",
"on-secondary-fixed-variant": "#035300",
"on-tertiary-container": "#b5885d",
"primary-fixed-dim": "#a8cbe6",
"on-secondary-fixed": "#012200",
"on-primary-fixed": "#001e2e",
"on-primary-container": "#7395ae",
"surface-container-low": "#f4f3f5",
"secondary-fixed": "#80fe68",
"inverse-on-surface": "#f1f0f2",
"inverse-surface": "#2f3032",
"primary-fixed": "#c8e6ff",
"on-surface-variant": "#42474c",
"bright-blue": "#52B8FF",
"bright-indigo": "#7661FF"
},
"borderRadius": {
"DEFAULT": "0.125rem",
"lg": "0.25rem",
"xl": "0.5rem",
"full": "0.75rem"
}
},
fontFamily: {
'sans': ['ServiceNow Sans', 'Hanken Grotesk', 'sans-serif'],
'mono': ['ServiceNow Sans Mono', 'JetBrains Mono', 'monospace'],
}
},
}
</script>
</head>
<body class="font-sans">
<div class="fixed top-0 right-0 -z-10 pointer-events-none opacity-[0.07] overflow-hidden w-[800px] h-[960px] watermark-container" style="transform: translate(25%, -10%) rotate(-15deg) scale(0.8);">
<svg class="w-full h-full" viewbox="0 0 100 100" xmlns="http://www.w3.org/2000/svg">
<rect class="wave-1" fill="#032D42" height="22" rx="3" width="22" x="10" y="10"></rect>
<rect class="wave-2" fill="#7661FF" height="22" rx="3" width="22" x="39" y="10"></rect>
<rect class="wave-3" fill="#63DF4E" height="22" rx="3" width="22" x="68" y="10"></rect>
<rect class="wave-2" fill="#52B8FF" height="22" rx="3" width="22" x="10" y="39"></rect>
<rect class="wave-3" fill="#032D42" fill-opacity="0.6" height="22" rx="3" width="22" x="39" y="39"></rect>
<rect class="wave-4" fill="#032D42" height="22" rx="3" width="22" x="68" y="39"></rect>
<rect class="wave-3" fill="#63DF4E" fill-opacity="0.4" height="22" rx="3" width="22" x="10" y="68"></rect>
<rect class="wave-4" fill="#7661FF" fill-opacity="0.8" height="22" rx="3" width="22" x="39" y="68"></rect>
<rect class="wave-5" fill="#032D42" height="22" rx="3" width="22" x="68" y="68"></rect>
</svg>
</div>
<div class="max-w-[1280px] mx-auto px-6 md:px-8 pb-16">
<!-- Top navigation -->
<nav class="flex items-center justify-between py-6 border-b border-border-subtle mb-10">
<div class="flex items-center gap-4">
<span class="flex items-center">
<svg aria-label="ServiceNow" height="20" viewbox="0 0 132 20" width="132" xmlns="http://www.w3.org/2000/svg"><path d="M32.022,6.391a5.626,5.626,0,0,0-3.61,1.3V6.523H25.119V19.35h3.425v-8.2a4.073,4.073,0,0,1,3.109-1.588,3.494,3.494,0,0,1,1.374.206V6.479a5.851,5.851,0,0,0-1.005-.088" fill="#032D42" fill-rule="evenodd"></path><path d="M2.16,15.436a5.369,5.369,0,0,0,3.5,1.269c.922,0,1.633-.45,1.633-1.084,0-1.931-6.191-1.243-6.191-5.369,0-2.459,2.371-3.993,4.9-3.993a7.9,7.9,0,0,1,4.32,1.3L8.72,10.04a4.3,4.3,0,0,0-2.45-.872c-.948,0-1.739.37-1.739,1.031,0,1.666,6.192,1.005,6.192,5.448,0,2.46-2.4,3.967-5.085,3.967A8.815,8.815,0,0,1,.5,17.9Z" fill="#032D42" fill-rule="evenodd"></path><path d="M23.657,12.817c0-3.57-2.5-6.558-6.034-6.558-3.794,0-6.218,3.12-6.218,6.691A6.4,6.4,0,0,0,18.1,19.614a6.919,6.919,0,0,0,5.243-2.3l-1.95-1.957a4.515,4.515,0,0,1-3.214,1.481A3.36,3.36,0,0,1,14.725,13.8h8.853A5.735,5.735,0,0,0,23.657,12.817Zm-8.774-1.533a2.775,2.775,0,0,1,2.74-2.248,2.552,2.552,0,0,1,2.53,2.248Z" fill="#032D42" fill-rule="evenodd"></path><polygon fill="#032D42" fill-rule="evenodd" points="41.19 14.351 44.694 6.523 48.252 6.523 42.376 19.35 40.005 19.35 34.129 6.523 37.686 6.523 41.19 14.351"></polygon><path d="M51.128.5A2.2,2.2,0,1,1,48.888,2.7,2.2,2.2,0,0,1,51.128.5" fill="#032D42" fill-rule="evenodd"></path><rect fill="#032D42" height="12.827" width="3.425" x="49.415" y="6.523"></rect><path d="M67,16.731a6.766,6.766,0,0,1-5.8,2.883,6.68,6.68,0,1,1,.026-13.355,6.808,6.808,0,0,1,5.375,2.565l-2.424,2.142a3.7,3.7,0,0,0-2.951-1.534A3.433,3.433,0,0,0,57.78,12.95a3.383,3.383,0,0,0,3.531,3.49,3.741,3.741,0,0,0,3.056-1.692Z" fill="#032D42" fill-rule="evenodd"></path><path d="M79.442,17.313a6.918,6.918,0,0,1-5.243,2.3,6.4,6.4,0,0,1-6.692-6.664c0-3.571,2.424-6.691,6.218-6.691,3.53,0,6.033,2.988,6.033,6.558a5.635,5.635,0,0,1-.079.979H70.826a3.36,3.36,0,0,0,3.452,3.041,4.52,4.52,0,0,0,3.215-1.481Zm-3.188-6.029a2.551,2.551,0,0,0-2.529-2.248,2.774,2.774,0,0,0-2.74,2.248Z" fill="#032D42" fill-rule="evenodd"></path><path d="M81.12,19.35V6.523h3.293V7.554a5.625,5.625,0,0,1,3.609-1.295,5.747,5.747,0,0,1,4.427,2.063,6.482,6.482,0,0,1,1.317,4.5V19.35H90.341v-6.8a3.11,3.11,0,0,0-.764-2.407,2.69,2.69,0,0,0-1.923-.714,4.076,4.076,0,0,0-3.109,1.587V19.35Z" fill="#032D42"></path><path d="M102.586,6.259A7.5,7.5,0,0,0,97.419,19.21a1.481,1.481,0,0,0,1.926.1,5.355,5.355,0,0,1,6.394,0,1.485,1.485,0,0,0,1.937-.113,7.5,7.5,0,0,0-5.09-12.94M102.542,17.5a3.637,3.637,0,0,1-3.734-3.733,3.734,3.734,0,1,1,7.468,0,3.637,3.637,0,0,1-3.734,3.733" fill="#62d84e" fill-rule="evenodd"></path><polygon fill="#032D42" fill-rule="evenodd" points="116.788 19.35 114.237 19.35 109.15 6.523 112.57 6.523 115.359 13.853 118.094 6.523 120.952 6.523 123.662 13.853 126.475 6.523 129.896 6.523 124.809 19.35 122.258 19.35 119.523 12.046 116.788 19.35"></polygon><path d="M129.935,17.9h-.289v.543H129.4V16.957h.6a.476.476,0,0,1,.478.478.469.469,0,0,1-.295.432l.338.579h-.268Zm-.289-.219H130a.249.249,0,0,0,0-.5h-.351Z" fill="#032D42"></path><path d="M129.9,16.348a1.379,1.379,0,1,1-1.378,1.379,1.381,1.381,0,0,1,1.378-1.379m0-.218a1.6,1.6,0,1,0,1.6,1.6,1.6,1.6,0,0,0-1.6-1.6Z" fill="#032D42"></path></svg>
</span>
<span class="w-[1px] h-6 bg-border-subtle"></span>
<span class="flex items-center gap-2">
<img alt="NOWAI-Bench Logo" class="w-6 h-6" src="assets/nowai-bench-logo.svg"/>
<span class="text-infinite-blue font-bold tracking-tight text-[15px]">NOWAI-Bench</span>
</span>
</div>
</nav>
<!-- Hero section -->
<header class="mb-12">
<h1 class="text-infinite-blue font-bold text-5xl md:text-6xl tracking-tighter mb-4 leading-none">NOWAI-Bench</h1>
<p class="text-on-surface-variant text-lg md:text-xl font-light max-w-2xl">An open benchmarking suite for enterprise AI agents.</p>
</header>
<!-- Compact stat strip -->
<div class="grid grid-cols-2 sm:flex sm:gap-16 py-8 border-y border-border-subtle mb-16">
<div>
<div class="text-infinite-blue font-bold text-4xl tabular-nums leading-none">2</div>
<div class="font-mono text-[11px] text-on-surface-variant uppercase tracking-[0.12em] mt-3">Benchmarks</div>
</div>
<div>
<div class="text-infinite-blue font-bold text-4xl tabular-nums leading-none flex items-baseline">
2026<span class="text-on-surface-variant text-base font-medium ml-1">Q2</span>
</div>
<div class="font-mono text-[11px] text-on-surface-variant uppercase tracking-[0.12em] mt-3">Last updated</div>
</div>
</div>
<!-- Overview section -->
<section class="mb-20">
<h2 class="text-infinite-blue font-bold text-3xl tracking-tight mb-8">Overview</h2>
<div class="prose max-w-3xl">
<p class="text-infinite-blue text-[16px] leading-[1.7] mb-5">
NOWAI-Bench is a coordinated, multi-benchmark effort by ServiceNow to measure whether AI agents perform reliably across the workflows, modalities, and governance demands of real enterprises. Rather than a single test, it is an expanding portfolio of benchmarks—each targeting a distinct slice of what enterprise agents are asked to do.
</p>
<p class="text-infinite-blue text-[16px] leading-[1.7] mb-5">
The current release covers two slices: <strong class="font-bold">EnterpriseOps-Gym</strong> evaluates long-horizon task agents across eight enterprise domains, and <strong class="font-bold">EVA-Bench</strong> evaluates voice agents on both task accuracy and conversational experience. Together they span text-based multi-step workflow execution and governed voice interaction—two of the most common deployment patterns for enterprise agents today.
</p>
<p class="text-infinite-blue text-[16px] leading-[1.7]">
This document describes the currently released benchmarks and how to read their results. It is intended to stay live: as new benchmarks land, the README is updated to reflect them.
</p>
</div>
</section>
<!-- Capability leaders header -->
<div class="flex flex-col md:flex-row md:items-end md:justify-between gap-6 mb-8">
<div>
<h2 class="text-infinite-blue font-bold text-3xl tracking-tight mb-2">Benchmark Leaderboard</h2>
<p class="text-on-surface-variant font-light text-[15px]">A high-level overview of each NOWAI-Bench benchmark.</p>
</div>
<div class="md:text-right">
<span class="inline-block bg-infinite-blue text-white font-mono text-[11px] px-3 py-1.5 rounded uppercase tracking-wider">v1.0</span>
</div>
</div>
<!-- Benchmark cards -->
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
<!-- EnterpriseOps-Gym -->
<div class="bg-white border border-border-subtle rounded-2xl p-8 card-shadow border-l-[4px] accent-infinite flex flex-col">
<div class="flex items-center gap-3 mb-8">
<span class="w-3 h-3 rounded-full fill-infinite"></span>
<h3 class="text-2xl tracking-tight"><a class="title-anim font-bold" href="https://enterpriseops-gym.github.io/" target="_blank" rel="noopener">EnterpriseOps-Gym</a></h3>
<span class="tooltip">
<button type="button" class="tooltip-trigger" aria-label="About EnterpriseOps-Gym" aria-describedby="tt-eog">
<span class="material-symbols-outlined" style="font-size:18px" aria-hidden="true">info</span>
</button>
<span class="tooltip-bubble" role="tooltip" id="tt-eog">Long-horizon task agents evaluated across eight enterprise domains.</span>
</span>
<span class="text-on-surface-variant text-xs font-medium bg-surface-muted px-2 py-0.5 rounded">long-horizon task agents</span>
</div>
<div class="mb-6">
<div class="font-mono text-[10px] text-on-surface-variant uppercase tracking-[0.15em] mb-2">Top model</div>
<div id="eog-top-name" class="text-infinite-blue font-bold text-xl leading-tight">Claude Opus 4.5</div>
<div id="eog-top-org" class="text-on-surface-variant text-sm font-medium">Anthropic</div>
</div>
<div class="bg-[#f0f9f3] border-l-4 border-wasabi-green rounded-xl p-4 flex items-center gap-3 mb-8">
<span id="eog-top-score" class="text-infinite-blue font-bold text-3xl tabular-nums leading-none">37.4<span class="text-on-surface-variant text-lg font-medium ml-0.5">%</span></span>
<span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider font-medium">Task Success Rate · Oracle mode</span>
<span class="tooltip">
<button type="button" class="tooltip-trigger" aria-label="Task Success Rate definition" aria-describedby="tt-eog-metric">
<span class="material-symbols-outlined" style="font-size:16px" aria-hidden="true">info</span>
</button>
<span class="tooltip-bubble" role="tooltip" id="tt-eog-metric">A task passes only if all verification conditions are met.</span>
</span>
</div>
<div id="eog-runners" class="space-y-4 pb-6">
<!-- Runner 2 -->
<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3">
<span class="text-on-surface-variant text-xs tabular-nums font-medium">2</span>
<span class="text-on-surface text-sm font-medium">GPT-5.4</span>
<div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden">
<div class="h-full fill-infinite rounded-full" style="width: 93%;"></div>
</div>
<span class="text-infinite-blue text-sm font-bold text-right tabular-nums">34.8</span>
</div>
<!-- Runner 3 -->
<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3">
<span class="text-on-surface-variant text-xs tabular-nums font-medium">3</span>
<span class="text-on-surface text-sm font-medium">Gemini 3 Pro</span>
<div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden">
<div class="h-full fill-infinite rounded-full" style="width: 83%;"></div>
</div>
<span class="text-infinite-blue text-sm font-bold text-right tabular-nums">31.2</span>
</div>
<!-- Runner 4 -->
<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3">
<span class="text-on-surface-variant text-xs tabular-nums font-medium">4</span>
<span class="text-on-surface text-sm font-medium">Claude Sonnet 4.6</span>
<div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden">
<div class="h-full fill-infinite rounded-full" style="width: 76%;"></div>
</div>
<span class="text-infinite-blue text-sm font-bold text-right tabular-nums">28.6</span>
</div>
<!-- Runner 5 -->
<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3">
<span class="text-on-surface-variant text-xs tabular-nums font-medium">5</span>
<span class="text-on-surface text-sm font-medium">Model Five (placeholder)</span>
<div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden">
<div class="h-full fill-infinite rounded-full" style="width: 70%;"></div>
</div>
<span class="text-infinite-blue text-sm font-bold text-right tabular-nums">26.0</span>
</div>
</div>
<div class="mt-auto pt-6 border-t border-border-subtle">
<a class="text-infinite-blue font-bold text-sm hover:text-[#006DAA] transition-colors flex items-center gap-2 group" href="https://enterpriseops-gym.github.io/#leaderboard" target="_blank" rel="noopener">
View full EOG leaderboard
<span class="material-symbols-outlined text-sm transition-transform group-hover:translate-x-0.5 group-hover:-translate-y-0.5">arrow_outward</span>
</a>
</div>
</div>
<!-- EVA-Bench -->
<div class="bg-white border border-border-subtle rounded-2xl p-8 card-shadow border-l-[4px] accent-indigo flex flex-col">
<div class="flex items-center gap-3 mb-8">
<span class="w-3 h-3 rounded-full fill-indigo"></span>
<h3 class="text-2xl tracking-tight"><a class="title-anim font-bold" href="https://servicenow.github.io/eva/" target="_blank" rel="noopener">EVA-Bench</a></h3>
<span class="tooltip">
<button type="button" class="tooltip-trigger" aria-label="About EVA-Bench" aria-describedby="tt-eva">
<span class="material-symbols-outlined" style="font-size:18px" aria-hidden="true">info</span>
</button>
<span class="tooltip-bubble" role="tooltip" id="tt-eva">Voice agents evaluated on task accuracy and conversational experience.</span>
</span>
<span class="text-on-surface-variant text-xs font-medium bg-surface-muted px-2 py-0.5 rounded">voice agents · accuracy and experience</span>
</div>
<div class="space-y-6 pb-6">
<!-- Accuracy Split -->
<div>
<div class="flex justify-between items-baseline mb-3">
<span class="font-mono text-[10px] text-[#5240CC] uppercase tracking-[0.15em] font-bold">EVA-Accuracy</span>
<span class="flex items-center gap-1">
<span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider">Pass@1</span>
<span class="tooltip">
<button type="button" class="tooltip-trigger" aria-label="EVA-Accuracy definition" aria-describedby="tt-eva-acc">
<span class="material-symbols-outlined" style="font-size:15px" aria-hidden="true">info</span>
</button>
<span class="tooltip-bubble" role="tooltip" id="tt-eva-acc">Scores for accuracy. All values normalized to 0–1 (higher is better). 95% bootstrap confidence intervals shown for each value.</span>
</span>
</span>
</div>
<div class="bg-[#f2f0ff] border-l-4 border-bright-indigo rounded-lg p-3 flex justify-between items-center mb-3">
<div>
<div id="eva-acc-name" class="text-infinite-blue font-bold text-base leading-tight">Nova + GPT-5.4 + Sonic</div>
<div id="eva-acc-sub" class="text-on-surface-variant text-[11px] font-medium">Mixed Models · Cascade</div>
</div>
<span id="eva-acc-score" class="text-infinite-blue font-bold text-2xl tabular-nums">0.41</span>
</div>
<div id="eva-acc-runners" class="space-y-2.5">
<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs">
<span class="text-on-surface-variant tabular-nums font-medium">2</span>
<span class="text-on-surface font-medium truncate">Claude Opus 4.5</span>
<div class="h-1 w-full bg-indigo-soft rounded-full overflow-hidden">
<div class="h-full fill-indigo rounded-full" style="width: 78%;"></div>
</div>
<span class="text-infinite-blue font-bold text-right tabular-nums">0.32</span>
</div>
<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs">
<span class="text-on-surface-variant tabular-nums font-medium">3</span>
<span class="text-on-surface font-medium truncate">Scribe+Gemini-3-Flash</span>
<div class="h-1 w-full bg-indigo-soft rounded-full overflow-hidden">
<div class="h-full fill-indigo rounded-full" style="width: 76%;"></div>
</div>
<span class="text-infinite-blue font-bold text-right tabular-nums">0.31</span>
</div>
</div>
</div>
<!-- Experience Split -->
<div class="pt-6 border-t border-dashed border-border-subtle">
<div class="flex justify-between items-baseline mb-3">
<span class="font-mono text-[10px] text-[#006DAA] uppercase tracking-[0.15em] font-bold">EVA-Experience</span>
<span class="flex items-center gap-1">
<span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider">Pass@1</span>
<span class="tooltip">
<button type="button" class="tooltip-trigger" aria-label="EVA-Experience definition" aria-describedby="tt-eva-exp">
<span class="material-symbols-outlined" style="font-size:15px" aria-hidden="true">info</span>
</button>
<span class="tooltip-bubble" role="tooltip" id="tt-eva-exp">Scores for conversational experience. All values normalized to 0–1 (higher is better). 95% bootstrap confidence intervals shown for each value.</span>
</span>
</span>
</div>
<div class="bg-[#f0f9ff] border-l-4 border-bright-blue rounded-lg p-3 flex justify-between items-center mb-3">
<div>
<div id="eva-exp-name" class="text-infinite-blue font-bold text-base leading-tight">Gemini Live</div>
<div id="eva-exp-sub" class="text-on-surface-variant text-[11px] font-medium">Google · Speech-to-Speech</div>
</div>
<span id="eva-exp-score" class="text-infinite-blue font-bold text-2xl tabular-nums">0.49</span>
</div>
<div id="eva-exp-runners" class="space-y-2.5">
<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs">
<span class="text-on-surface-variant tabular-nums font-medium">2</span>
<span class="text-on-surface font-medium truncate">GPT-Realtime</span>
<div class="h-1 w-full bg-blue-soft rounded-full overflow-hidden">
<div class="h-full fill-blue rounded-full" style="width: 96%;"></div>
</div>
<span class="text-infinite-blue font-bold text-right tabular-nums">0.47</span>
</div>
<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs">
<span class="text-on-surface-variant tabular-nums font-medium">3</span>
<span class="text-on-surface font-medium truncate">Whisper+Qwen 3.5</span>
<div class="h-1 w-full bg-blue-soft rounded-full overflow-hidden">
<div class="h-full fill-blue rounded-full" style="width: 88%;"></div>
</div>
<span class="text-infinite-blue font-bold text-right tabular-nums">0.43</span>
</div>
</div>
</div>
</div>
<div class="mt-auto pt-6 border-t border-border-subtle">
<a class="text-infinite-blue font-bold text-sm hover:text-[#006DAA] transition-colors flex items-center gap-2 group" href="https://servicenow.github.io/eva/#results" target="_blank" rel="noopener">
View full EVA leaderboard
<span class="material-symbols-outlined text-sm transition-transform group-hover:translate-x-0.5 group-hover:-translate-y-0.5">arrow_outward</span>
</a>
</div>
</div>
</div>
<!-- Footer -->
<footer class="mt-24 pt-8 border-t border-border-subtle text-center">
<p class="text-on-surface-variant text-[11px] font-light uppercase tracking-widest">
&copy; 2026 ServiceNow, Inc. All rights reserved.
</p>
</footer>
</div>
<script>
// Overlay synced leaderboard data on the static fallback markup.
// On any failure, the static markup already in the DOM remains.
(function () {
function esc(s) {
return String(s).replace(/[&<>"']/g, (c) => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' }[c]));
}
function eogRunnerRow(r) {
return `<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3">
<span class="text-on-surface-variant text-xs tabular-nums font-medium">${r.rank}</span>
<span class="text-on-surface text-sm font-medium">${esc(r.model)}</span>
<div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden">
<div class="h-full fill-infinite rounded-full" style="width: ${r.bar}%;"></div>
</div>
<span class="text-infinite-blue text-sm font-bold text-right tabular-nums">${r.score.toFixed(1)}</span>
</div>`;
}
function evaRunnerRow(r, fill) {
return `<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs">
<span class="text-on-surface-variant tabular-nums font-medium">${r.rank}</span>
<span class="text-on-surface font-medium truncate">${esc(r.name)}</span>
<div class="h-1 w-full bg-${fill}-soft rounded-full overflow-hidden">
<div class="h-full fill-${fill} rounded-full" style="width: ${r.bar}%;"></div>
</div>
<span class="text-infinite-blue font-bold text-right tabular-nums">${r.score.toFixed(2)}</span>
</div>`;
}
function setText(id, value) {
const el = document.getElementById(id);
if (el) el.textContent = value;
}
function renderEog(board) {
const top = board.rows[0];
if (!top) return;
setText('eog-top-name', top.model);
setText('eog-top-org', top.org);
const score = document.getElementById('eog-top-score');
if (score) score.innerHTML = `${top.score.toFixed(1)}<span class="text-on-surface-variant text-lg font-medium ml-0.5">%</span>`;
const runners = document.getElementById('eog-runners');
if (runners) runners.innerHTML = board.rows.slice(1).map(eogRunnerRow).join('');
}
function renderEvaSplit(board, prefix, fill) {
const top = board.rows[0];
if (!top) return;
setText(`${prefix}-name`, top.name);
setText(`${prefix}-sub`, top.subtitle);
setText(`${prefix}-score`, top.score.toFixed(2));
const runners = document.getElementById(`${prefix}-runners`);
if (runners) runners.innerHTML = board.rows.slice(1).map((r) => evaRunnerRow(r, fill)).join('');
}
fetch('./data/leaderboard.json', { cache: 'no-cache' })
.then((res) => { if (!res.ok) throw new Error('HTTP ' + res.status); return res.json(); })
.then((data) => {
if (data.eog) renderEog(data.eog);
if (data.evaAccuracy) renderEvaSplit(data.evaAccuracy, 'eva-acc', 'indigo');
if (data.evaExperience) renderEvaSplit(data.evaExperience, 'eva-exp', 'blue');
})
.catch((err) => {
console.warn('Leaderboard JSON load failed; showing static fallback.', err);
});
})();
</script>
</body>
</html>