alex17cmbs commited on
Commit
85e4975
·
verified ·
1 Parent(s): 86dddb3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -33
README.md CHANGED
@@ -1,18 +1,19 @@
1
- ---
2
- title: README
3
- emoji: 👀
4
- colorFrom: gray
5
- colorTo: yellow
6
- sdk: static
7
- pinned: false
8
- thumbnail: >-
9
- https://cdn-uploads.huggingface.co/production/uploads/65b9463ac9aa94b1ca85414b/kCAsXut29_mLiA69uBzyh.jpeg
10
- ---
11
-
12
 
13
  <div style="max-width:980px;margin:0 auto;padding:24px 16px;font:16px/1.55 system-ui,-apple-system,Segoe UI,Roboto;">
 
14
  <header style="display:flex;gap:16px;align-items:center;margin-bottom:12px;">
15
- <img src="./assets/logo.png" alt="Foaster.ai" width="42" height="42" style="border-radius:8px">
16
  <div>
17
  <h1 style="margin:0;font-size:26px">Foaster.ai</h1>
18
  <p style="margin:4px 0 0;color:#64748b">Reshaping businesses for the agentic era.</p>
@@ -22,30 +23,19 @@ thumbnail: >-
22
  <p><strong>Foaster.ai</strong> is a French start-up focused on the agentic era. At <em>Foaster Labs</em>, our Werewolf Benchmark studies how LLMs behave under social pressure—leadership, bluffing, and resistance to manipulation.</p>
23
 
24
  <div style="display:flex;align-items:center;gap:10px;margin:14px 0 22px;">
25
- <a href="https://huggingface.co/spaces/Foaster-ai/werewolf-leaderboard" style="padding:10px 14px;border:1px solid #e5e7eb;border-radius:10px;text-decoration:none;">🔗 Full leaderboard →</a>
 
 
 
26
  </div>
27
 
28
  <h3 style="margin:0 0 8px;">Results — Podium (role-conditioned Elo)</h3>
29
  <p style="margin:0 0 10px;color:#64748b">ELO-W = wolf (manipulation power) · ELO-V = villager (manipulation resistance)</p>
30
 
31
- <div style="border:1px solid #e5e7eb;border-radius:12px;overflow:hidden">
32
- <table style="width:100%;border-collapse:collapse;font-size:14px">
33
- <thead style="background:#f9fafb;color:#475569;text-transform:uppercase;font-size:12px;letter-spacing:.3px">
34
- <tr>
35
- <th style="padding:10px;text-align:left">Rank</th>
36
- <th style="padding:10px;text-align:left">Model</th>
37
- <th style="padding:10px;text-align:center">ELO</th>
38
- <th style="padding:10px;text-align:center">ELO-W</th>
39
- <th style="padding:10px;text-align:center">ELO-V</th>
40
- <th style="padding:10px;text-align:center">Win rate</th>
41
- <th style="padding:10px;text-align:center">Matches</th>
42
- </tr>
43
- </thead>
44
- <tbody>
45
- <tr><td style="padding:10px">🥇 #1</td><td style="padding:10px">GPT-5 (OpenAI)</td><td style="padding:10px;text-align:center">1492</td><td style="padding:10px;text-align:center">1508</td><td style="padding:10px;text-align:center">1476</td><td style="padding:10px;text-align:center">96.7%</td><td style="padding:10px;text-align:center">60</td></tr>
46
- <tr><td style="padding:10px">🥈 #2</td><td style="padding:10px">Gemini 2.5 Pro (Google)</td><td style="padding:10px;text-align:center">1261</td><td style="padding:10px;text-align:center">1163</td><td style="padding:10px;text-align:center">1360</td><td style="padding:10px;text-align:center">63.3%</td><td style="padding:10px;text-align:center">60</td></tr>
47
- <tr><td style="padding:10px">🥉 #3</td><td style="padding:10px">Gemini 2.5 Flash (Google)</td><td style="padding:10px;text-align:center">1188</td><td style="padding:10px;text-align:center">1103</td><td style="padding:10px;text-align:center">1273</td><td style="padding:10px;text-align:center">51.7%</td><td style="padding:10px;text-align:center">60</td></tr>
48
- </tbody>
49
- </table>
50
- </div>
51
  </div>
 
1
+ ---
2
+ title: README
3
+ emoji: 👀
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: static
7
+ pinned: false
8
+ thumbnail: >-
9
+ https://cdn-uploads.huggingface.co/production/uploads/65b9463ac9aa94b1ca85414b/kCAsXut29_mLiA69uBzyh.jpeg
10
+ short_description: Reshaping businesses for the agentic era.
11
+ ---
12
 
13
  <div style="max-width:980px;margin:0 auto;padding:24px 16px;font:16px/1.55 system-ui,-apple-system,Segoe UI,Roboto;">
14
+
15
  <header style="display:flex;gap:16px;align-items:center;margin-bottom:12px;">
16
+ <img src="assets/foaster-card.png" alt="Foaster.ai" width="42" height="42" style="border-radius:8px;object-fit:cover;">
17
  <div>
18
  <h1 style="margin:0;font-size:26px">Foaster.ai</h1>
19
  <p style="margin:4px 0 0;color:#64748b">Reshaping businesses for the agentic era.</p>
 
23
  <p><strong>Foaster.ai</strong> is a French start-up focused on the agentic era. At <em>Foaster Labs</em>, our Werewolf Benchmark studies how LLMs behave under social pressure—leadership, bluffing, and resistance to manipulation.</p>
24
 
25
  <div style="display:flex;align-items:center;gap:10px;margin:14px 0 22px;">
26
+ <a href="https://huggingface.co/spaces/Foaster-ai/werewolf-leaderboard"
27
+ style="padding:10px 14px;border:1px solid #e5e7eb;border-radius:10px;text-decoration:none;">
28
+ 🔗 Full leaderboard →
29
+ </a>
30
  </div>
31
 
32
  <h3 style="margin:0 0 8px;">Results — Podium (role-conditioned Elo)</h3>
33
  <p style="margin:0 0 10px;color:#64748b">ELO-W = wolf (manipulation power) · ELO-V = villager (manipulation resistance)</p>
34
 
35
+ <ul style="margin:0 0 8px 18px;">
36
+ <li>🥇 <strong>GPT-5 (OpenAI)</strong> — ELO 1492 (W 1508 · V 1476), win rate 96.7%, 60 matches</li>
37
+ <li>🥈 <strong>Gemini 2.5 Pro (Google)</strong> — ELO 1261 (W 1163 · V 1360), win rate 63.3%, 60 matches</li>
38
+ <li>🥉 <strong>Gemini 2.5 Flash (Google)</strong> — ELO 1188 (W 1103 · V 1273), win rate 51.7%, 60 matches</li>
39
+ </ul>
40
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  </div>