Spaces:
Running
Running
File size: 3,072 Bytes
0121818 ba5565d 0121818 ba5565d 0121818 ba5565d 0121818 30d00f2 0121818 30d00f2 0121818 30d00f2 0121818 30d00f2 0121818 30d00f2 0121818 85f0b3a 0121818 ba5565d 0121818 ba5565d 85f0b3a ba5565d 85f0b3a 0121818 85f0b3a 0121818 85f0b3a 0121818 85f0b3a ba5565d 85f0b3a ba5565d 85f0b3a 0121818 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | {
"updated": "2026-06-10T18:19:04.013Z",
"sources": {
"eog": {
"url": "https://raw.githubusercontent.com/EnterpriseOps-Gym/EnterpriseOps-Gym.github.io/main/script.js",
"hash": "sha256:f96e11145e82ffd4bb2654a9e7cf42f5ebd3b10f481aa1027be22a0d3a1badba",
"fetchedAt": "2026-06-10T18:19:04.012Z"
},
"eva": {
"url": "https://raw.githubusercontent.com/ServiceNow/eva/main/website/src/data/leaderboardStats.json",
"hash": "sha256:1f7a33021ffea8a3719c24cf31f75cfeff4b0271f77ef8abfd0063099081b288",
"fetchedAt": "2026-06-10T18:19:04.013Z"
}
},
"eog": {
"metricLabel": "Task Success Rate 路 Oracle mode",
"rows": [
{
"rank": 1,
"model": "Gemini 3.5 Flash (High)",
"org": "Google",
"type": "closed",
"score": 46,
"bar": 46
},
{
"rank": 2,
"model": "Claude Opus 4.6",
"org": "Anthropic",
"type": "closed",
"score": 44.6,
"bar": 45
},
{
"rank": 3,
"model": "GPT-5.5 (High)",
"org": "OpenAI",
"type": "closed",
"score": 41.2,
"bar": 41
},
{
"rank": 4,
"model": "Claude Opus 4.7 (High)",
"org": "Anthropic",
"type": "closed",
"score": 41.2,
"bar": 41
},
{
"rank": 5,
"model": "Claude Sonnet 4.6",
"org": "Anthropic",
"type": "closed",
"score": 40.4,
"bar": 40
}
]
},
"evaAccuracy": {
"metricLabel": "Pass@1",
"rows": [
{
"rank": 1,
"name": "Scribe v2.2 Realtime + GPT-5.4 + Eleven Flash v2 (ElevenAgents)",
"subtitle": "Mixed Models 路 Cascade",
"score": 0.67,
"ciLower": 0.62,
"ciUpper": 0.72,
"bar": 67
},
{
"rank": 2,
"name": "Grok Voice Think Fast 1.0",
"subtitle": "Grok 路 Speech-to-Speech",
"score": 0.59,
"ciLower": 0.54,
"ciUpper": 0.64,
"bar": 59
},
{
"rank": 3,
"name": "Nova 3 + GPT-5.4 + Sonic 3",
"subtitle": "Mixed Models 路 Cascade",
"score": 0.5,
"ciLower": 0.46,
"ciUpper": 0.54,
"bar": 50
}
]
},
"evaExperience": {
"metricLabel": "Pass@1",
"rows": [
{
"rank": 1,
"name": "Scribe v2.2 Realtime + Claude Haiku 4.5 + Eleven Flash v2 (ElevenAgents)",
"subtitle": "Mixed Models 路 Cascade",
"score": 0.82,
"ciLower": 0.79,
"ciUpper": 0.84,
"bar": 82
},
{
"rank": 2,
"name": "Gemini 3.1 Flash Live",
"subtitle": "Google 路 Speech-to-Speech",
"score": 0.59,
"ciLower": 0.56,
"ciUpper": 0.62,
"bar": 59
},
{
"rank": 3,
"name": "Grok Voice Think Fast 1.0",
"subtitle": "Grok 路 Speech-to-Speech",
"score": 0.57,
"ciLower": 0.53,
"ciUpper": 0.61,
"bar": 57
}
]
}
}
|