thinkwee
commited on
Commit
·
bd3e774
1
Parent(s):
00bf757
update title
Browse files- index.html +2 -2
index.html
CHANGED
|
@@ -205,7 +205,7 @@
|
|
| 205 |
<path d="M3 3v18h18" />
|
| 206 |
<path d="m19 9-5 5-4-4-3 3" />
|
| 207 |
</svg>
|
| 208 |
-
|
| 209 |
</h2>
|
| 210 |
<p>Main benchmark results and in-depth analysis of agent capabilities.</p>
|
| 211 |
</div>
|
|
@@ -357,7 +357,7 @@
|
|
| 357 |
<path d="M14 14.66V17c0 .55.47.98.97 1.21C16.15 18.75 17 20.24 17 22" />
|
| 358 |
<path d="M18 2H6v7a6 6 0 0 0 12 0V2Z" />
|
| 359 |
</svg>
|
| 360 |
-
|
| 361 |
</h2>
|
| 362 |
<p>
|
| 363 |
Novelty (Bradley-Terry) vs Accuracy ranking
|
|
|
|
| 205 |
<path d="M3 3v18h18" />
|
| 206 |
<path d="m19 9-5 5-4-4-3 3" />
|
| 207 |
</svg>
|
| 208 |
+
Experiments
|
| 209 |
</h2>
|
| 210 |
<p>Main benchmark results and in-depth analysis of agent capabilities.</p>
|
| 211 |
</div>
|
|
|
|
| 357 |
<path d="M14 14.66V17c0 .55.47.98.97 1.21C16.15 18.75 17 20.24 17 22" />
|
| 358 |
<path d="M18 2H6v7a6 6 0 0 0 12 0V2Z" />
|
| 359 |
</svg>
|
| 360 |
+
Novelty vs Accuracy
|
| 361 |
</h2>
|
| 362 |
<p>
|
| 363 |
Novelty (Bradley-Terry) vs Accuracy ranking
|