Update README.md
Browse files
README.md
CHANGED
|
@@ -1,18 +1,19 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: README
|
| 3 |
-
emoji: 👀
|
| 4 |
-
colorFrom: gray
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: static
|
| 7 |
-
pinned: false
|
| 8 |
-
thumbnail: >-
|
| 9 |
-
https://cdn-uploads.huggingface.co/production/uploads/65b9463ac9aa94b1ca85414b/kCAsXut29_mLiA69uBzyh.jpeg
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
<div style="max-width:980px;margin:0 auto;padding:24px 16px;font:16px/1.55 system-ui,-apple-system,Segoe UI,Roboto;">
|
|
|
|
| 14 |
<header style="display:flex;gap:16px;align-items:center;margin-bottom:12px;">
|
| 15 |
-
<img src="
|
| 16 |
<div>
|
| 17 |
<h1 style="margin:0;font-size:26px">Foaster.ai</h1>
|
| 18 |
<p style="margin:4px 0 0;color:#64748b">Reshaping businesses for the agentic era.</p>
|
|
@@ -22,30 +23,19 @@ thumbnail: >-
|
|
| 22 |
<p><strong>Foaster.ai</strong> is a French start-up focused on the agentic era. At <em>Foaster Labs</em>, our Werewolf Benchmark studies how LLMs behave under social pressure—leadership, bluffing, and resistance to manipulation.</p>
|
| 23 |
|
| 24 |
<div style="display:flex;align-items:center;gap:10px;margin:14px 0 22px;">
|
| 25 |
-
<a href="https://huggingface.co/spaces/Foaster-ai/werewolf-leaderboard"
|
|
|
|
|
|
|
|
|
|
| 26 |
</div>
|
| 27 |
|
| 28 |
<h3 style="margin:0 0 8px;">Results — Podium (role-conditioned Elo)</h3>
|
| 29 |
<p style="margin:0 0 10px;color:#64748b">ELO-W = wolf (manipulation power) · ELO-V = villager (manipulation resistance)</p>
|
| 30 |
|
| 31 |
-
<
|
| 32 |
-
<
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
<th style="padding:10px;text-align:center">ELO</th>
|
| 38 |
-
<th style="padding:10px;text-align:center">ELO-W</th>
|
| 39 |
-
<th style="padding:10px;text-align:center">ELO-V</th>
|
| 40 |
-
<th style="padding:10px;text-align:center">Win rate</th>
|
| 41 |
-
<th style="padding:10px;text-align:center">Matches</th>
|
| 42 |
-
</tr>
|
| 43 |
-
</thead>
|
| 44 |
-
<tbody>
|
| 45 |
-
<tr><td style="padding:10px">🥇 #1</td><td style="padding:10px">GPT-5 (OpenAI)</td><td style="padding:10px;text-align:center">1492</td><td style="padding:10px;text-align:center">1508</td><td style="padding:10px;text-align:center">1476</td><td style="padding:10px;text-align:center">96.7%</td><td style="padding:10px;text-align:center">60</td></tr>
|
| 46 |
-
<tr><td style="padding:10px">🥈 #2</td><td style="padding:10px">Gemini 2.5 Pro (Google)</td><td style="padding:10px;text-align:center">1261</td><td style="padding:10px;text-align:center">1163</td><td style="padding:10px;text-align:center">1360</td><td style="padding:10px;text-align:center">63.3%</td><td style="padding:10px;text-align:center">60</td></tr>
|
| 47 |
-
<tr><td style="padding:10px">🥉 #3</td><td style="padding:10px">Gemini 2.5 Flash (Google)</td><td style="padding:10px;text-align:center">1188</td><td style="padding:10px;text-align:center">1103</td><td style="padding:10px;text-align:center">1273</td><td style="padding:10px;text-align:center">51.7%</td><td style="padding:10px;text-align:center">60</td></tr>
|
| 48 |
-
</tbody>
|
| 49 |
-
</table>
|
| 50 |
-
</div>
|
| 51 |
</div>
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: README
|
| 3 |
+
emoji: 👀
|
| 4 |
+
colorFrom: gray
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: static
|
| 7 |
+
pinned: false
|
| 8 |
+
thumbnail: >-
|
| 9 |
+
https://cdn-uploads.huggingface.co/production/uploads/65b9463ac9aa94b1ca85414b/kCAsXut29_mLiA69uBzyh.jpeg
|
| 10 |
+
short_description: Reshaping businesses for the agentic era.
|
| 11 |
+
---
|
| 12 |
|
| 13 |
<div style="max-width:980px;margin:0 auto;padding:24px 16px;font:16px/1.55 system-ui,-apple-system,Segoe UI,Roboto;">
|
| 14 |
+
|
| 15 |
<header style="display:flex;gap:16px;align-items:center;margin-bottom:12px;">
|
| 16 |
+
<img src="assets/foaster-card.png" alt="Foaster.ai" width="42" height="42" style="border-radius:8px;object-fit:cover;">
|
| 17 |
<div>
|
| 18 |
<h1 style="margin:0;font-size:26px">Foaster.ai</h1>
|
| 19 |
<p style="margin:4px 0 0;color:#64748b">Reshaping businesses for the agentic era.</p>
|
|
|
|
| 23 |
<p><strong>Foaster.ai</strong> is a French start-up focused on the agentic era. At <em>Foaster Labs</em>, our Werewolf Benchmark studies how LLMs behave under social pressure—leadership, bluffing, and resistance to manipulation.</p>
|
| 24 |
|
| 25 |
<div style="display:flex;align-items:center;gap:10px;margin:14px 0 22px;">
|
| 26 |
+
<a href="https://huggingface.co/spaces/Foaster-ai/werewolf-leaderboard"
|
| 27 |
+
style="padding:10px 14px;border:1px solid #e5e7eb;border-radius:10px;text-decoration:none;">
|
| 28 |
+
🔗 Full leaderboard →
|
| 29 |
+
</a>
|
| 30 |
</div>
|
| 31 |
|
| 32 |
<h3 style="margin:0 0 8px;">Results — Podium (role-conditioned Elo)</h3>
|
| 33 |
<p style="margin:0 0 10px;color:#64748b">ELO-W = wolf (manipulation power) · ELO-V = villager (manipulation resistance)</p>
|
| 34 |
|
| 35 |
+
<ul style="margin:0 0 8px 18px;">
|
| 36 |
+
<li>🥇 <strong>GPT-5 (OpenAI)</strong> — ELO 1492 (W 1508 · V 1476), win rate 96.7%, 60 matches</li>
|
| 37 |
+
<li>🥈 <strong>Gemini 2.5 Pro (Google)</strong> — ELO 1261 (W 1163 · V 1360), win rate 63.3%, 60 matches</li>
|
| 38 |
+
<li>🥉 <strong>Gemini 2.5 Flash (Google)</strong> — ELO 1188 (W 1103 · V 1273), win rate 51.7%, 60 matches</li>
|
| 39 |
+
</ul>
|
| 40 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
</div>
|