Spaces:
Running
Running
| <html class="light" lang="en"> | |
| <head> | |
| <meta charset="utf-8"/> | |
| <meta content="width=device-width, initial-scale=1.0" name="viewport"/> | |
| <title>NOWAI-Bench | Enterprise AI Benchmarking</title> | |
| <script src="https://cdn.tailwindcss.com?plugins=forms,container-queries"></script> | |
| <link href="https://fonts.googleapis.com/css2?family=Hanken+Grotesk:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"/> | |
| <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap" rel="stylesheet"/> | |
| <link rel="stylesheet" href="style.css"/> | |
| <script id="tailwind-config"> | |
| tailwind.config = { | |
| darkMode: "class", | |
| theme: { | |
| extend: { | |
| "colors": { | |
| "surface-muted": "#F4F7F9", | |
| "tertiary-container": "#402302", | |
| "error": "#ba1a1a", | |
| "surface": "#faf9fb", | |
| "surface-container-highest": "#e3e2e4", | |
| "on-tertiary-fixed-variant": "#623f1b", | |
| "error-container": "#ffdad6", | |
| "surface-dim": "#dadadc", | |
| "secondary": "#056e00", | |
| "surface-container-high": "#e8e8ea", | |
| "border-subtle": "#E1E8ED", | |
| "secondary-container": "#80fe68", | |
| "on-tertiary": "#ffffff", | |
| "surface-tint": "#406279", | |
| "on-primary-fixed-variant": "#284a61", | |
| "inverse-primary": "#a8cbe6", | |
| "infinite-blue": "#032D42", | |
| "background": "#faf9fb", | |
| "tertiary-fixed": "#ffdcbe", | |
| "on-secondary": "#ffffff", | |
| "tertiary-fixed-dim": "#efbd8e", | |
| "surface-variant": "#e3e2e4", | |
| "on-background": "#1a1c1d", | |
| "surface-container": "#eeedef", | |
| "primary-container": "#032d42", | |
| "primary": "#001725", | |
| "outline": "#72787d", | |
| "surface-container-lowest": "#ffffff", | |
| "surface-bright": "#faf9fb", | |
| "tertiary": "#241100", | |
| "secondary-fixed-dim": "#64e04f", | |
| "wasabi-green": "#63DF4E", | |
| "on-tertiary-fixed": "#2d1600", | |
| "on-secondary-container": "#057500", | |
| "outline-variant": "#c2c7cd", | |
| "on-error": "#ffffff", | |
| "on-error-container": "#93000a", | |
| "on-surface": "#1a1c1d", | |
| "data-neutral": "#64748B", | |
| "on-primary": "#ffffff", | |
| "on-secondary-fixed-variant": "#035300", | |
| "on-tertiary-container": "#b5885d", | |
| "primary-fixed-dim": "#a8cbe6", | |
| "on-secondary-fixed": "#012200", | |
| "on-primary-fixed": "#001e2e", | |
| "on-primary-container": "#7395ae", | |
| "surface-container-low": "#f4f3f5", | |
| "secondary-fixed": "#80fe68", | |
| "inverse-on-surface": "#f1f0f2", | |
| "inverse-surface": "#2f3032", | |
| "primary-fixed": "#c8e6ff", | |
| "on-surface-variant": "#42474c", | |
| "bright-blue": "#52B8FF", | |
| "bright-indigo": "#7661FF" | |
| }, | |
| "borderRadius": { | |
| "DEFAULT": "0.125rem", | |
| "lg": "0.25rem", | |
| "xl": "0.5rem", | |
| "full": "0.75rem" | |
| } | |
| }, | |
| fontFamily: { | |
| 'sans': ['ServiceNow Sans', 'Hanken Grotesk', 'sans-serif'], | |
| 'mono': ['ServiceNow Sans Mono', 'JetBrains Mono', 'monospace'], | |
| } | |
| }, | |
| } | |
| </script> | |
| </head> | |
| <body class="font-sans"> | |
| <div class="fixed top-0 right-0 -z-10 pointer-events-none opacity-[0.07] overflow-hidden w-[800px] h-[960px] watermark-container" style="transform: translate(25%, -10%) rotate(-15deg) scale(0.8);"> | |
| <svg class="w-full h-full" viewbox="0 0 100 100" xmlns="http://www.w3.org/2000/svg"> | |
| <rect class="wave-1" fill="#032D42" height="22" rx="3" width="22" x="10" y="10"></rect> | |
| <rect class="wave-2" fill="#7661FF" height="22" rx="3" width="22" x="39" y="10"></rect> | |
| <rect class="wave-3" fill="#63DF4E" height="22" rx="3" width="22" x="68" y="10"></rect> | |
| <rect class="wave-2" fill="#52B8FF" height="22" rx="3" width="22" x="10" y="39"></rect> | |
| <rect class="wave-3" fill="#032D42" fill-opacity="0.6" height="22" rx="3" width="22" x="39" y="39"></rect> | |
| <rect class="wave-4" fill="#032D42" height="22" rx="3" width="22" x="68" y="39"></rect> | |
| <rect class="wave-3" fill="#63DF4E" fill-opacity="0.4" height="22" rx="3" width="22" x="10" y="68"></rect> | |
| <rect class="wave-4" fill="#7661FF" fill-opacity="0.8" height="22" rx="3" width="22" x="39" y="68"></rect> | |
| <rect class="wave-5" fill="#032D42" height="22" rx="3" width="22" x="68" y="68"></rect> | |
| </svg> | |
| </div> | |
| <div class="max-w-[1280px] mx-auto px-6 md:px-8 pb-16"> | |
| <!-- Top navigation --> | |
| <nav class="flex items-center justify-between py-6 border-b border-border-subtle mb-10"> | |
| <div class="flex items-center gap-4"> | |
| <span class="flex items-center"> | |
| <svg aria-label="ServiceNow" height="20" viewbox="0 0 132 20" width="132" xmlns="http://www.w3.org/2000/svg"><path d="M32.022,6.391a5.626,5.626,0,0,0-3.61,1.3V6.523H25.119V19.35h3.425v-8.2a4.073,4.073,0,0,1,3.109-1.588,3.494,3.494,0,0,1,1.374.206V6.479a5.851,5.851,0,0,0-1.005-.088" fill="#032D42" fill-rule="evenodd"></path><path d="M2.16,15.436a5.369,5.369,0,0,0,3.5,1.269c.922,0,1.633-.45,1.633-1.084,0-1.931-6.191-1.243-6.191-5.369,0-2.459,2.371-3.993,4.9-3.993a7.9,7.9,0,0,1,4.32,1.3L8.72,10.04a4.3,4.3,0,0,0-2.45-.872c-.948,0-1.739.37-1.739,1.031,0,1.666,6.192,1.005,6.192,5.448,0,2.46-2.4,3.967-5.085,3.967A8.815,8.815,0,0,1,.5,17.9Z" fill="#032D42" fill-rule="evenodd"></path><path d="M23.657,12.817c0-3.57-2.5-6.558-6.034-6.558-3.794,0-6.218,3.12-6.218,6.691A6.4,6.4,0,0,0,18.1,19.614a6.919,6.919,0,0,0,5.243-2.3l-1.95-1.957a4.515,4.515,0,0,1-3.214,1.481A3.36,3.36,0,0,1,14.725,13.8h8.853A5.735,5.735,0,0,0,23.657,12.817Zm-8.774-1.533a2.775,2.775,0,0,1,2.74-2.248,2.552,2.552,0,0,1,2.53,2.248Z" fill="#032D42" fill-rule="evenodd"></path><polygon fill="#032D42" fill-rule="evenodd" points="41.19 14.351 44.694 6.523 48.252 6.523 42.376 19.35 40.005 19.35 34.129 6.523 37.686 6.523 41.19 14.351"></polygon><path d="M51.128.5A2.2,2.2,0,1,1,48.888,2.7,2.2,2.2,0,0,1,51.128.5" fill="#032D42" fill-rule="evenodd"></path><rect fill="#032D42" height="12.827" width="3.425" x="49.415" y="6.523"></rect><path d="M67,16.731a6.766,6.766,0,0,1-5.8,2.883,6.68,6.68,0,1,1,.026-13.355,6.808,6.808,0,0,1,5.375,2.565l-2.424,2.142a3.7,3.7,0,0,0-2.951-1.534A3.433,3.433,0,0,0,57.78,12.95a3.383,3.383,0,0,0,3.531,3.49,3.741,3.741,0,0,0,3.056-1.692Z" fill="#032D42" fill-rule="evenodd"></path><path d="M79.442,17.313a6.918,6.918,0,0,1-5.243,2.3,6.4,6.4,0,0,1-6.692-6.664c0-3.571,2.424-6.691,6.218-6.691,3.53,0,6.033,2.988,6.033,6.558a5.635,5.635,0,0,1-.079.979H70.826a3.36,3.36,0,0,0,3.452,3.041,4.52,4.52,0,0,0,3.215-1.481Zm-3.188-6.029a2.551,2.551,0,0,0-2.529-2.248,2.774,2.774,0,0,0-2.74,2.248Z" fill="#032D42" fill-rule="evenodd"></path><path d="M81.12,19.35V6.523h3.293V7.554a5.625,5.625,0,0,1,3.609-1.295,5.747,5.747,0,0,1,4.427,2.063,6.482,6.482,0,0,1,1.317,4.5V19.35H90.341v-6.8a3.11,3.11,0,0,0-.764-2.407,2.69,2.69,0,0,0-1.923-.714,4.076,4.076,0,0,0-3.109,1.587V19.35Z" fill="#032D42"></path><path d="M102.586,6.259A7.5,7.5,0,0,0,97.419,19.21a1.481,1.481,0,0,0,1.926.1,5.355,5.355,0,0,1,6.394,0,1.485,1.485,0,0,0,1.937-.113,7.5,7.5,0,0,0-5.09-12.94M102.542,17.5a3.637,3.637,0,0,1-3.734-3.733,3.734,3.734,0,1,1,7.468,0,3.637,3.637,0,0,1-3.734,3.733" fill="#62d84e" fill-rule="evenodd"></path><polygon fill="#032D42" fill-rule="evenodd" points="116.788 19.35 114.237 19.35 109.15 6.523 112.57 6.523 115.359 13.853 118.094 6.523 120.952 6.523 123.662 13.853 126.475 6.523 129.896 6.523 124.809 19.35 122.258 19.35 119.523 12.046 116.788 19.35"></polygon><path d="M129.935,17.9h-.289v.543H129.4V16.957h.6a.476.476,0,0,1,.478.478.469.469,0,0,1-.295.432l.338.579h-.268Zm-.289-.219H130a.249.249,0,0,0,0-.5h-.351Z" fill="#032D42"></path><path d="M129.9,16.348a1.379,1.379,0,1,1-1.378,1.379,1.381,1.381,0,0,1,1.378-1.379m0-.218a1.6,1.6,0,1,0,1.6,1.6,1.6,1.6,0,0,0-1.6-1.6Z" fill="#032D42"></path></svg> | |
| </span> | |
| <span class="w-[1px] h-6 bg-border-subtle"></span> | |
| <span class="flex items-center gap-2"> | |
| <img alt="NOWAI-Bench Logo" class="w-6 h-6" src="assets/nowai-bench-logo.svg"/> | |
| <span class="text-infinite-blue font-bold tracking-tight text-[15px]">NOWAI-Bench</span> | |
| </span> | |
| </div> | |
| </nav> | |
| <!-- Hero section --> | |
| <header class="mb-12"> | |
| <h1 class="text-infinite-blue font-bold text-5xl md:text-6xl tracking-tighter mb-4 leading-none">NOWAI-Bench</h1> | |
| <p class="text-on-surface-variant text-lg md:text-xl font-light max-w-2xl">An open benchmarking suite for enterprise AI agents.</p> | |
| </header> | |
| <!-- Compact stat strip --> | |
| <div class="grid grid-cols-2 sm:flex sm:gap-16 py-8 border-y border-border-subtle mb-16"> | |
| <div> | |
| <div class="text-infinite-blue font-bold text-4xl tabular-nums leading-none">2</div> | |
| <div class="font-mono text-[11px] text-on-surface-variant uppercase tracking-[0.12em] mt-3">Benchmarks</div> | |
| </div> | |
| <div> | |
| <div class="text-infinite-blue font-bold text-4xl tabular-nums leading-none flex items-baseline"> | |
| 2026<span class="text-on-surface-variant text-base font-medium ml-1">Q2</span> | |
| </div> | |
| <div class="font-mono text-[11px] text-on-surface-variant uppercase tracking-[0.12em] mt-3">Last updated</div> | |
| </div> | |
| </div> | |
| <!-- Overview section --> | |
| <section class="mb-20"> | |
| <h2 class="text-infinite-blue font-bold text-3xl tracking-tight mb-8">Overview</h2> | |
| <div class="prose max-w-3xl"> | |
| <p class="text-infinite-blue text-[16px] leading-[1.7] mb-5"> | |
| NOWAI-Bench is a coordinated, multi-benchmark effort by ServiceNow to measure whether AI agents perform reliably across the workflows, modalities, and governance demands of real enterprises. Rather than a single test, it is an expanding portfolio of benchmarks—each targeting a distinct slice of what enterprise agents are asked to do. | |
| </p> | |
| <p class="text-infinite-blue text-[16px] leading-[1.7] mb-5"> | |
| The current release covers two slices: <strong class="font-bold">EnterpriseOps-Gym</strong> evaluates long-horizon task agents across eight enterprise domains, and <strong class="font-bold">EVA-Bench</strong> evaluates voice agents on both task accuracy and conversational experience. Together they span text-based multi-step workflow execution and governed voice interaction—two of the most common deployment patterns for enterprise agents today. | |
| </p> | |
| <p class="text-infinite-blue text-[16px] leading-[1.7]"> | |
| This document describes the currently released benchmarks and how to read their results. It is intended to stay live: as new benchmarks land, the README is updated to reflect them. | |
| </p> | |
| </div> | |
| </section> | |
| <!-- Capability leaders header --> | |
| <div class="flex flex-col md:flex-row md:items-end md:justify-between gap-6 mb-8"> | |
| <div> | |
| <h2 class="text-infinite-blue font-bold text-3xl tracking-tight mb-2">Benchmark Leaderboard</h2> | |
| <p class="text-on-surface-variant font-light text-[15px]">A high-level overview of each NOWAI-Bench benchmark.</p> | |
| </div> | |
| <div class="md:text-right"> | |
| <span class="inline-block bg-infinite-blue text-white font-mono text-[11px] px-3 py-1.5 rounded uppercase tracking-wider">v1.0</span> | |
| </div> | |
| </div> | |
| <!-- Benchmark cards --> | |
| <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> | |
| <!-- EnterpriseOps-Gym --> | |
| <div class="bg-white border border-border-subtle rounded-2xl p-8 card-shadow border-l-[4px] accent-infinite flex flex-col"> | |
| <div class="flex items-center gap-3 mb-8"> | |
| <span class="w-3 h-3 rounded-full fill-infinite"></span> | |
| <h3 class="text-2xl tracking-tight"><a class="title-anim font-bold" href="https://enterpriseops-gym.github.io/" target="_blank" rel="noopener">EnterpriseOps-Gym</a></h3> | |
| <span class="tooltip"> | |
| <button type="button" class="tooltip-trigger" aria-label="About EnterpriseOps-Gym" aria-describedby="tt-eog"> | |
| <span class="material-symbols-outlined" style="font-size:18px" aria-hidden="true">info</span> | |
| </button> | |
| <span class="tooltip-bubble" role="tooltip" id="tt-eog">Long-horizon task agents evaluated across eight enterprise domains.</span> | |
| </span> | |
| <span class="text-on-surface-variant text-xs font-medium bg-surface-muted px-2 py-0.5 rounded">long-horizon task agents</span> | |
| </div> | |
| <div class="mb-6"> | |
| <div class="font-mono text-[10px] text-on-surface-variant uppercase tracking-[0.15em] mb-2">Top model</div> | |
| <div id="eog-top-name" class="text-infinite-blue font-bold text-xl leading-tight">Claude Opus 4.5</div> | |
| <div id="eog-top-org" class="text-on-surface-variant text-sm font-medium">Anthropic</div> | |
| </div> | |
| <div class="bg-[#f0f9f3] border-l-4 border-wasabi-green rounded-xl p-4 flex items-center gap-3 mb-8"> | |
| <span id="eog-top-score" class="text-infinite-blue font-bold text-3xl tabular-nums leading-none">37.4<span class="text-on-surface-variant text-lg font-medium ml-0.5">%</span></span> | |
| <span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider font-medium">Task Success Rate · Oracle mode</span> | |
| <span class="tooltip"> | |
| <button type="button" class="tooltip-trigger" aria-label="Task Success Rate definition" aria-describedby="tt-eog-metric"> | |
| <span class="material-symbols-outlined" style="font-size:16px" aria-hidden="true">info</span> | |
| </button> | |
| <span class="tooltip-bubble" role="tooltip" id="tt-eog-metric">A task passes only if all verification conditions are met.</span> | |
| </span> | |
| </div> | |
| <div id="eog-runners" class="space-y-4 pb-6"> | |
| <!-- Runner 2 --> | |
| <div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3"> | |
| <span class="text-on-surface-variant text-xs tabular-nums font-medium">2</span> | |
| <span class="text-on-surface text-sm font-medium">GPT-5.4</span> | |
| <div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-infinite rounded-full" style="width: 93%;"></div> | |
| </div> | |
| <span class="text-infinite-blue text-sm font-bold text-right tabular-nums">34.8</span> | |
| </div> | |
| <!-- Runner 3 --> | |
| <div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3"> | |
| <span class="text-on-surface-variant text-xs tabular-nums font-medium">3</span> | |
| <span class="text-on-surface text-sm font-medium">Gemini 3 Pro</span> | |
| <div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-infinite rounded-full" style="width: 83%;"></div> | |
| </div> | |
| <span class="text-infinite-blue text-sm font-bold text-right tabular-nums">31.2</span> | |
| </div> | |
| <!-- Runner 4 --> | |
| <div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3"> | |
| <span class="text-on-surface-variant text-xs tabular-nums font-medium">4</span> | |
| <span class="text-on-surface text-sm font-medium">Claude Sonnet 4.6</span> | |
| <div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-infinite rounded-full" style="width: 76%;"></div> | |
| </div> | |
| <span class="text-infinite-blue text-sm font-bold text-right tabular-nums">28.6</span> | |
| </div> | |
| <!-- Runner 5 --> | |
| <div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3"> | |
| <span class="text-on-surface-variant text-xs tabular-nums font-medium">5</span> | |
| <span class="text-on-surface text-sm font-medium">Model Five (placeholder)</span> | |
| <div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-infinite rounded-full" style="width: 70%;"></div> | |
| </div> | |
| <span class="text-infinite-blue text-sm font-bold text-right tabular-nums">26.0</span> | |
| </div> | |
| </div> | |
| <div class="mt-auto pt-6 border-t border-border-subtle"> | |
| <a class="text-infinite-blue font-bold text-sm hover:text-[#006DAA] transition-colors flex items-center gap-2 group" href="https://enterpriseops-gym.github.io/#leaderboard" target="_blank" rel="noopener"> | |
| View full EOG leaderboard | |
| <span class="material-symbols-outlined text-sm transition-transform group-hover:translate-x-0.5 group-hover:-translate-y-0.5">arrow_outward</span> | |
| </a> | |
| </div> | |
| </div> | |
| <!-- EVA-Bench --> | |
| <div class="bg-white border border-border-subtle rounded-2xl p-8 card-shadow border-l-[4px] accent-indigo flex flex-col"> | |
| <div class="flex items-center gap-3 mb-8"> | |
| <span class="w-3 h-3 rounded-full fill-indigo"></span> | |
| <h3 class="text-2xl tracking-tight"><a class="title-anim font-bold" href="https://servicenow.github.io/eva/" target="_blank" rel="noopener">EVA-Bench</a></h3> | |
| <span class="tooltip"> | |
| <button type="button" class="tooltip-trigger" aria-label="About EVA-Bench" aria-describedby="tt-eva"> | |
| <span class="material-symbols-outlined" style="font-size:18px" aria-hidden="true">info</span> | |
| </button> | |
| <span class="tooltip-bubble" role="tooltip" id="tt-eva">Voice agents evaluated on task accuracy and conversational experience.</span> | |
| </span> | |
| <span class="text-on-surface-variant text-xs font-medium bg-surface-muted px-2 py-0.5 rounded">voice agents · accuracy and experience</span> | |
| </div> | |
| <div class="space-y-6 pb-6"> | |
| <!-- Accuracy Split --> | |
| <div> | |
| <div class="flex justify-between items-baseline mb-3"> | |
| <span class="font-mono text-[10px] text-[#5240CC] uppercase tracking-[0.15em] font-bold">EVA-Accuracy</span> | |
| <span class="flex items-center gap-1"> | |
| <span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider">Pass@1</span> | |
| <span class="tooltip"> | |
| <button type="button" class="tooltip-trigger" aria-label="EVA-Accuracy definition" aria-describedby="tt-eva-acc"> | |
| <span class="material-symbols-outlined" style="font-size:15px" aria-hidden="true">info</span> | |
| </button> | |
| <span class="tooltip-bubble" role="tooltip" id="tt-eva-acc">Scores for accuracy. All values normalized to 0–1 (higher is better). 95% bootstrap confidence intervals shown for each value.</span> | |
| </span> | |
| </span> | |
| </div> | |
| <div class="bg-[#f2f0ff] border-l-4 border-bright-indigo rounded-lg p-3 flex justify-between items-center mb-3"> | |
| <div> | |
| <div id="eva-acc-name" class="text-infinite-blue font-bold text-base leading-tight">Nova + GPT-5.4 + Sonic</div> | |
| <div id="eva-acc-sub" class="text-on-surface-variant text-[11px] font-medium">Mixed Models · Cascade</div> | |
| </div> | |
| <span id="eva-acc-score" class="text-infinite-blue font-bold text-2xl tabular-nums">0.41</span> | |
| </div> | |
| <div id="eva-acc-runners" class="space-y-2.5"> | |
| <div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs"> | |
| <span class="text-on-surface-variant tabular-nums font-medium">2</span> | |
| <span class="text-on-surface font-medium truncate">Claude Opus 4.5</span> | |
| <div class="h-1 w-full bg-indigo-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-indigo rounded-full" style="width: 78%;"></div> | |
| </div> | |
| <span class="text-infinite-blue font-bold text-right tabular-nums">0.32</span> | |
| </div> | |
| <div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs"> | |
| <span class="text-on-surface-variant tabular-nums font-medium">3</span> | |
| <span class="text-on-surface font-medium truncate">Scribe+Gemini-3-Flash</span> | |
| <div class="h-1 w-full bg-indigo-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-indigo rounded-full" style="width: 76%;"></div> | |
| </div> | |
| <span class="text-infinite-blue font-bold text-right tabular-nums">0.31</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Experience Split --> | |
| <div class="pt-6 border-t border-dashed border-border-subtle"> | |
| <div class="flex justify-between items-baseline mb-3"> | |
| <span class="font-mono text-[10px] text-[#006DAA] uppercase tracking-[0.15em] font-bold">EVA-Experience</span> | |
| <span class="flex items-center gap-1"> | |
| <span class="font-mono text-[10px] text-on-surface-variant uppercase tracking-wider">Pass@1</span> | |
| <span class="tooltip"> | |
| <button type="button" class="tooltip-trigger" aria-label="EVA-Experience definition" aria-describedby="tt-eva-exp"> | |
| <span class="material-symbols-outlined" style="font-size:15px" aria-hidden="true">info</span> | |
| </button> | |
| <span class="tooltip-bubble" role="tooltip" id="tt-eva-exp">Scores for conversational experience. All values normalized to 0–1 (higher is better). 95% bootstrap confidence intervals shown for each value.</span> | |
| </span> | |
| </span> | |
| </div> | |
| <div class="bg-[#f0f9ff] border-l-4 border-bright-blue rounded-lg p-3 flex justify-between items-center mb-3"> | |
| <div> | |
| <div id="eva-exp-name" class="text-infinite-blue font-bold text-base leading-tight">Gemini Live</div> | |
| <div id="eva-exp-sub" class="text-on-surface-variant text-[11px] font-medium">Google · Speech-to-Speech</div> | |
| </div> | |
| <span id="eva-exp-score" class="text-infinite-blue font-bold text-2xl tabular-nums">0.49</span> | |
| </div> | |
| <div id="eva-exp-runners" class="space-y-2.5"> | |
| <div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs"> | |
| <span class="text-on-surface-variant tabular-nums font-medium">2</span> | |
| <span class="text-on-surface font-medium truncate">GPT-Realtime</span> | |
| <div class="h-1 w-full bg-blue-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-blue rounded-full" style="width: 96%;"></div> | |
| </div> | |
| <span class="text-infinite-blue font-bold text-right tabular-nums">0.47</span> | |
| </div> | |
| <div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs"> | |
| <span class="text-on-surface-variant tabular-nums font-medium">3</span> | |
| <span class="text-on-surface font-medium truncate">Whisper+Qwen 3.5</span> | |
| <div class="h-1 w-full bg-blue-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-blue rounded-full" style="width: 88%;"></div> | |
| </div> | |
| <span class="text-infinite-blue font-bold text-right tabular-nums">0.43</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="mt-auto pt-6 border-t border-border-subtle"> | |
| <a class="text-infinite-blue font-bold text-sm hover:text-[#006DAA] transition-colors flex items-center gap-2 group" href="https://servicenow.github.io/eva/#results" target="_blank" rel="noopener"> | |
| View full EVA leaderboard | |
| <span class="material-symbols-outlined text-sm transition-transform group-hover:translate-x-0.5 group-hover:-translate-y-0.5">arrow_outward</span> | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Footer --> | |
| <footer class="mt-24 pt-8 border-t border-border-subtle text-center"> | |
| <p class="text-on-surface-variant text-[11px] font-light uppercase tracking-widest"> | |
| © 2026 ServiceNow, Inc. All rights reserved. | |
| </p> | |
| </footer> | |
| </div> | |
| <script> | |
| // Overlay synced leaderboard data on the static fallback markup. | |
| // On any failure, the static markup already in the DOM remains. | |
| (function () { | |
| function esc(s) { | |
| return String(s).replace(/[&<>"']/g, (c) => ({ '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' }[c])); | |
| } | |
| function eogRunnerRow(r) { | |
| return `<div class="grid grid-cols-[20px_1fr_100px_45px] items-center gap-3"> | |
| <span class="text-on-surface-variant text-xs tabular-nums font-medium">${r.rank}</span> | |
| <span class="text-on-surface text-sm font-medium">${esc(r.model)}</span> | |
| <div class="h-1.5 w-full bg-infinite-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-infinite rounded-full" style="width: ${r.bar}%;"></div> | |
| </div> | |
| <span class="text-infinite-blue text-sm font-bold text-right tabular-nums">${r.score.toFixed(1)}</span> | |
| </div>`; | |
| } | |
| function evaRunnerRow(r, fill) { | |
| return `<div class="grid grid-cols-[18px_1fr_80px_35px] items-center gap-2 text-xs"> | |
| <span class="text-on-surface-variant tabular-nums font-medium">${r.rank}</span> | |
| <span class="text-on-surface font-medium truncate">${esc(r.name)}</span> | |
| <div class="h-1 w-full bg-${fill}-soft rounded-full overflow-hidden"> | |
| <div class="h-full fill-${fill} rounded-full" style="width: ${r.bar}%;"></div> | |
| </div> | |
| <span class="text-infinite-blue font-bold text-right tabular-nums">${r.score.toFixed(2)}</span> | |
| </div>`; | |
| } | |
| function setText(id, value) { | |
| const el = document.getElementById(id); | |
| if (el) el.textContent = value; | |
| } | |
| function renderEog(board) { | |
| const top = board.rows[0]; | |
| if (!top) return; | |
| setText('eog-top-name', top.model); | |
| setText('eog-top-org', top.org); | |
| const score = document.getElementById('eog-top-score'); | |
| if (score) score.innerHTML = `${top.score.toFixed(1)}<span class="text-on-surface-variant text-lg font-medium ml-0.5">%</span>`; | |
| const runners = document.getElementById('eog-runners'); | |
| if (runners) runners.innerHTML = board.rows.slice(1).map(eogRunnerRow).join(''); | |
| } | |
| function renderEvaSplit(board, prefix, fill) { | |
| const top = board.rows[0]; | |
| if (!top) return; | |
| setText(`${prefix}-name`, top.name); | |
| setText(`${prefix}-sub`, top.subtitle); | |
| setText(`${prefix}-score`, top.score.toFixed(2)); | |
| const runners = document.getElementById(`${prefix}-runners`); | |
| if (runners) runners.innerHTML = board.rows.slice(1).map((r) => evaRunnerRow(r, fill)).join(''); | |
| } | |
| fetch('./data/leaderboard.json', { cache: 'no-cache' }) | |
| .then((res) => { if (!res.ok) throw new Error('HTTP ' + res.status); return res.json(); }) | |
| .then((data) => { | |
| if (data.eog) renderEog(data.eog); | |
| if (data.evaAccuracy) renderEvaSplit(data.evaAccuracy, 'eva-acc', 'indigo'); | |
| if (data.evaExperience) renderEvaSplit(data.evaExperience, 'eva-exp', 'blue'); | |
| }) | |
| .catch((err) => { | |
| console.warn('Leaderboard JSON load failed; showing static fallback.', err); | |
| }); | |
| })(); | |
| </script> | |
| </body> | |
| </html> | |