bradnow commited on
Commit
ba5565d
1 Parent(s): bcfd770

Sync leaderboard: EOG + EVA 2026-06-10

Browse files

evaAccuracy:
old: Scribe v2.2 Realtime + GPT-5.4 + Eleven Flash v2 (ElevenAgents)=0.67, Nova 3 + GPT-5.4 + Sonic 3=0.5, GPT Realtime 2=0.5
new: Scribe v2.2 Realtime + GPT-5.4 + Eleven Flash v2 (ElevenAgents)=0.67, Grok Voice Think Fast 1.0=0.59, Nova 3 + GPT-5.4 + Sonic 3=0.5
evaExperience:
old: Scribe v2.2 Realtime + Claude Haiku 4.5 + Eleven Flash v2 (ElevenAgents)=0.82, Gemini 3.1 Flash Live=0.59, GPT Realtime 1.5=0.57
new: Scribe v2.2 Realtime + Claude Haiku 4.5 + Eleven Flash v2 (ElevenAgents)=0.82, Gemini 3.1 Flash Live=0.59, Grok Voice Think Fast 1.0=0.57

Files changed (1) hide show
  1. data/leaderboard.json +17 -17
data/leaderboard.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "updated": "2026-06-03T01:31:40.593Z",
3
  "sources": {
4
  "eog": {
5
  "url": "https://raw.githubusercontent.com/EnterpriseOps-Gym/EnterpriseOps-Gym.github.io/main/script.js",
6
- "hash": "sha256:2b46960ad4be62ef9283b5f2ef831842d71e2d31a0da8a0a451cc4fc3c0dc18c",
7
- "fetchedAt": "2026-06-03T01:31:40.592Z"
8
  },
9
  "eva": {
10
  "url": "https://raw.githubusercontent.com/ServiceNow/eva/main/website/src/data/leaderboardStats.json",
11
- "hash": "sha256:9c0d382f8ada74bda7343d6ee52af35a8dac4e8c8b993b3e1821c26d4ef8ed11",
12
- "fetchedAt": "2026-06-03T01:31:40.593Z"
13
  }
14
  },
15
  "eog": {
@@ -71,20 +71,20 @@
71
  },
72
  {
73
  "rank": 2,
74
- "name": "Nova 3 + GPT-5.4 + Sonic 3",
75
- "subtitle": "Mixed Models Cascade",
76
- "score": 0.5,
77
- "ciLower": 0.46,
78
- "ciUpper": 0.54,
79
- "bar": 50
80
  },
81
  {
82
  "rank": 3,
83
- "name": "GPT Realtime 2",
84
- "subtitle": "OpenAISpeech-to-Speech",
85
  "score": 0.5,
86
  "ciLower": 0.46,
87
- "ciUpper": 0.55,
88
  "bar": 50
89
  }
90
  ]
@@ -112,11 +112,11 @@
112
  },
113
  {
114
  "rank": 3,
115
- "name": "GPT Realtime 1.5",
116
- "subtitle": "OpenAI 路 Speech-to-Speech",
117
  "score": 0.57,
118
  "ciLower": 0.53,
119
- "ciUpper": 0.60,
120
  "bar": 57
121
  }
122
  ]
 
1
  {
2
+ "updated": "2026-06-10T18:19:04.013Z",
3
  "sources": {
4
  "eog": {
5
  "url": "https://raw.githubusercontent.com/EnterpriseOps-Gym/EnterpriseOps-Gym.github.io/main/script.js",
6
+ "hash": "sha256:f96e11145e82ffd4bb2654a9e7cf42f5ebd3b10f481aa1027be22a0d3a1badba",
7
+ "fetchedAt": "2026-06-10T18:19:04.012Z"
8
  },
9
  "eva": {
10
  "url": "https://raw.githubusercontent.com/ServiceNow/eva/main/website/src/data/leaderboardStats.json",
11
+ "hash": "sha256:1f7a33021ffea8a3719c24cf31f75cfeff4b0271f77ef8abfd0063099081b288",
12
+ "fetchedAt": "2026-06-10T18:19:04.013Z"
13
  }
14
  },
15
  "eog": {
 
71
  },
72
  {
73
  "rank": 2,
74
+ "name": "Grok Voice Think Fast 1.0",
75
+ "subtitle": "GrokSpeech-to-Speech",
76
+ "score": 0.59,
77
+ "ciLower": 0.54,
78
+ "ciUpper": 0.64,
79
+ "bar": 59
80
  },
81
  {
82
  "rank": 3,
83
+ "name": "Nova 3 + GPT-5.4 + Sonic 3",
84
+ "subtitle": "Mixed Models Cascade",
85
  "score": 0.5,
86
  "ciLower": 0.46,
87
+ "ciUpper": 0.54,
88
  "bar": 50
89
  }
90
  ]
 
112
  },
113
  {
114
  "rank": 3,
115
+ "name": "Grok Voice Think Fast 1.0",
116
+ "subtitle": "Grok 路 Speech-to-Speech",
117
  "score": 0.57,
118
  "ciLower": 0.53,
119
+ "ciUpper": 0.61,
120
  "bar": 57
121
  }
122
  ]