lainwired's picture
Initial jaxaht-benchmark deployment
5146e76
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Hanabi AHT Evaluation</title>
<script src="https://unpkg.com/react@18/umd/react.production.min.js" crossorigin></script>
<script src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js" crossorigin></script>
<script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
<style>
:root {
--bg: #0f0f0f;
--surface: #1a1a2e;
--surface2: #16213e;
--accent: #e94560;
--accent2: #0f3460;
--text: #eee;
--text2: #aaa;
--green: #2ecc71;
--yellow: #f1c40f;
--blue: #3498db;
--radius: 10px;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
background: var(--bg);
color: var(--text);
min-height: 100vh;
}
.container { max-width: 960px; margin: 0 auto; padding: 32px 20px; }
h1 { font-size: 28px; margin-bottom: 4px; }
h1 span { color: var(--accent); }
.subtitle { color: var(--text2); margin-bottom: 32px; font-size: 14px; }
.card {
background: var(--surface);
border-radius: var(--radius);
padding: 24px;
margin-bottom: 20px;
}
.card h2 {
font-size: 16px;
text-transform: uppercase;
letter-spacing: 1px;
color: var(--text2);
margin-bottom: 16px;
}
.form-row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 16px;
margin-bottom: 16px;
}
.form-group { display: flex; flex-direction: column; gap: 6px; }
label { font-size: 13px; color: var(--text2); font-weight: 500; }
select, input[type="text"], input[type="number"] {
background: var(--bg);
border: 1px solid #333;
color: var(--text);
padding: 10px 12px;
border-radius: 6px;
font-size: 14px;
outline: none;
transition: border-color 0.2s;
}
select:focus, input:focus { border-color: var(--accent); }
.btn {
background: var(--accent);
color: white;
border: none;
padding: 12px 28px;
border-radius: 6px;
font-size: 15px;
font-weight: 600;
cursor: pointer;
transition: opacity 0.2s, transform 0.1s;
width: 100%;
}
.btn:hover { opacity: 0.9; }
.btn:active { transform: scale(0.98); }
.btn:disabled { opacity: 0.4; cursor: not-allowed; }
.btn-secondary {
background: var(--accent2);
}
.results-grid {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 12px;
margin-bottom: 16px;
}
.stat-box {
background: var(--bg);
border-radius: 8px;
padding: 16px;
text-align: center;
}
.stat-box .value {
font-size: 28px;
font-weight: 700;
color: var(--green);
}
.stat-box .label {
font-size: 11px;
color: var(--text2);
text-transform: uppercase;
letter-spacing: 0.5px;
margin-top: 4px;
}
.histogram {
display: flex;
align-items: flex-end;
gap: 2px;
height: 80px;
padding: 0 4px;
}
.histogram .bar {
flex: 1;
background: var(--blue);
border-radius: 2px 2px 0 0;
min-height: 2px;
transition: height 0.3s;
position: relative;
}
.histogram .bar:hover { opacity: 0.8; }
table {
width: 100%;
border-collapse: collapse;
font-size: 14px;
}
th {
text-align: left;
padding: 10px 12px;
color: var(--text2);
font-size: 12px;
text-transform: uppercase;
letter-spacing: 0.5px;
border-bottom: 1px solid #333;
}
td {
padding: 10px 12px;
border-bottom: 1px solid #1a1a1a;
}
tr:hover td { background: rgba(255,255,255,0.02); }
.rank { color: var(--yellow); font-weight: 700; }
.score { color: var(--green); font-weight: 600; }
.partner-tag {
display: inline-block;
background: var(--accent2);
padding: 2px 8px;
border-radius: 4px;
font-size: 12px;
}
.spinner {
display: inline-block;
width: 18px;
height: 18px;
border: 2px solid rgba(255,255,255,0.3);
border-top-color: white;
border-radius: 50%;
animation: spin 0.6s linear infinite;
margin-right: 8px;
vertical-align: middle;
}
@keyframes spin { to { transform: rotate(360deg); } }
.meta { font-size: 12px; color: var(--text2); margin-top: 8px; }
.empty-state {
text-align: center;
padding: 40px;
color: var(--text2);
}
.tabs {
display: flex;
gap: 4px;
margin-bottom: 20px;
}
.tab {
padding: 8px 20px;
background: var(--surface);
border: none;
color: var(--text2);
border-radius: 6px 6px 0 0;
cursor: pointer;
font-size: 14px;
font-weight: 500;
}
.tab.active {
background: var(--surface2);
color: var(--text);
}
.file-upload {
border: 2px dashed #333;
border-radius: 8px;
padding: 20px;
text-align: center;
cursor: pointer;
transition: border-color 0.2s;
}
.file-upload:hover { border-color: var(--accent); }
.file-upload.has-file { border-color: var(--green); }
.or-divider {
text-align: center;
color: var(--text2);
font-size: 13px;
margin: 12px 0;
}
</style>
</head>
<body>
<div id="root"></div>
<script type="text/babel">
const { useState, useEffect, useCallback } = React;
function App() {
const [partners, setPartners] = useState({});
const [egos, setEgos] = useState({});
const [leaderboard, setLeaderboard] = useState([]);
const [tab, setTab] = useState('evaluate');
// Form state
const [agentName, setAgentName] = useState('');
const [partner, setPartner] = useState('bc_lstm');
const [egoKey, setEgoKey] = useState('iggi');
const [numEpisodes, setNumEpisodes] = useState(32);
const [useUpload, setUseUpload] = useState(false);
const [uploadFile, setUploadFile] = useState(null);
const [uploadActorType, setUploadActorType] = useState('mlp');
const [uploadHiddenDim, setUploadHiddenDim] = useState(512);
const [uploadActivation, setUploadActivation] = useState('relu');
// Eval state
const [loading, setLoading] = useState(false);
const [result, setResult] = useState(null);
const [error, setError] = useState(null);
useEffect(() => {
fetch('/api/partners').then(r => r.json()).then(setPartners);
fetch('/api/egos').then(r => r.json()).then(setEgos);
refreshLeaderboard();
}, []);
const refreshLeaderboard = () => {
fetch('/api/leaderboard').then(r => r.json()).then(d => setLeaderboard(d.entries || []));
};
const runEval = async () => {
setLoading(true);
setError(null);
setResult(null);
try {
let resp;
if (useUpload) {
if (!uploadFile) {
setError('Please select a checkpoint .zip file');
setLoading(false);
return;
}
const form = new FormData();
form.append('agent_name', agentName || uploadFile.name.replace(/\.zip$/, ''));
form.append('partner', partner);
form.append('num_episodes', numEpisodes);
form.append('actor_type', uploadActorType);
form.append('hidden_dim', uploadHiddenDim);
form.append('activation', uploadActivation);
form.append('checkpoint', uploadFile);
resp = await fetch('/api/evaluate_upload', { method: 'POST', body: form });
} else {
const egoInfo = egos[egoKey] || {};
resp = await fetch('/api/evaluate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
agent_name: agentName || (egoInfo.name || egoKey) + ' vs ' + (partners[partner]?.name || partner),
partner,
ego: egoKey,
num_episodes: numEpisodes,
}),
});
}
const data = await resp.json();
if (resp.ok) {
setResult(data);
refreshLeaderboard();
} else {
setError(data.error + (data.hint ? ` (${data.hint})` : ''));
}
} catch (e) {
setError(e.message);
} finally {
setLoading(false);
}
};
return (
<div className="container">
<h1>Hanabi <span>AHT</span> Evaluation</h1>
<p className="subtitle">
Submit policies and evaluate against held-out partners. Extensible to AH2AC2 formal submissions.
</p>
<div className="tabs">
<button className={`tab ${tab === 'evaluate' ? 'active' : ''}`} onClick={() => setTab('evaluate')}>
Evaluate
</button>
<button className={`tab ${tab === 'leaderboard' ? 'active' : ''}`} onClick={() => setTab('leaderboard')}>
Leaderboard ({leaderboard.length})
</button>
</div>
{tab === 'evaluate' && (
<>
<div className="card">
<h2>Configuration</h2>
<div style={{marginBottom: 16, display: 'flex', gap: 8}}>
<button
className={`tab ${!useUpload ? 'active' : ''}`}
style={{borderRadius: 6}}
onClick={() => setUseUpload(false)}>
Built-in ego
</button>
<button
className={`tab ${useUpload ? 'active' : ''}`}
style={{borderRadius: 6}}
onClick={() => setUseUpload(true)}>
Upload checkpoint
</button>
</div>
{!useUpload && (
<div className="form-row">
<div className="form-group">
<label>Ego Agent (the agent being tested)</label>
<select value={egoKey} onChange={e => setEgoKey(e.target.value)}>
{Object.entries(egos).map(([k, v]) => (
<option key={k} value={k}>{v.name}{v.trained ? ' *' : ''}</option>
))}
</select>
{egos[egoKey]?.trained && (
<span style={{fontSize: 11, color: '#2ecc71', marginTop: 4}}>* Trained RL checkpoint</span>
)}
</div>
<div className="form-group">
<label>Held-out Partner (teammate)</label>
<select value={partner} onChange={e => setPartner(e.target.value)}>
{Object.entries(partners).map(([k, v]) => (
<option key={k} value={k}>{v.name}</option>
))}
</select>
</div>
</div>
)}
{useUpload && (
<>
<div
className={`file-upload ${uploadFile ? 'has-file' : ''}`}
onClick={() => document.getElementById('file-input').click()}
style={{marginBottom: 12}}>
<input id="file-input" type="file" accept=".zip"
style={{display: 'none'}}
onChange={e => setUploadFile(e.target.files[0])} />
{uploadFile
? `Selected: ${uploadFile.name} (${(uploadFile.size/1024/1024).toFixed(1)} MB)`
: 'Click to upload orbax checkpoint as .zip (must contain saved_train_run/ directory)'}
</div>
<div className="form-row">
<div className="form-group">
<label>Actor type</label>
<select value={uploadActorType} onChange={e => setUploadActorType(e.target.value)}>
<option value="mlp">MLP</option>
<option value="s5">S5</option>
<option value="rnn">RNN</option>
</select>
</div>
<div className="form-group">
<label>Hidden dim</label>
<input type="number" value={uploadHiddenDim}
onChange={e => setUploadHiddenDim(parseInt(e.target.value) || 512)} />
</div>
</div>
<div className="form-row">
<div className="form-group">
<label>Activation</label>
<select value={uploadActivation} onChange={e => setUploadActivation(e.target.value)}>
<option value="relu">ReLU</option>
<option value="tanh">Tanh</option>
</select>
</div>
<div className="form-group">
<label>Held-out Partner (teammate)</label>
<select value={partner} onChange={e => setPartner(e.target.value)}>
{Object.entries(partners).map(([k, v]) => (
<option key={k} value={k}>{v.name}</option>
))}
</select>
</div>
</div>
<p style={{fontSize: 11, color: '#666', marginBottom: 12}}>
Architecture params are overridden by config.yaml if included in the zip.
</p>
</>
)}
<div className="form-row">
<div className="form-group">
<label>Label (for leaderboard)</label>
<input type="text" placeholder="auto-generated if empty"
value={agentName} onChange={e => setAgentName(e.target.value)} />
</div>
<div className="form-group">
<label>Episodes</label>
<input type="number" min="1" max="1000" value={numEpisodes}
onChange={e => setNumEpisodes(parseInt(e.target.value) || 32)} />
</div>
</div>
{!useUpload && egos[egoKey] && (
<p style={{fontSize: 12, color: '#666', marginTop: -8, marginBottom: 12}}>
{egos[egoKey].description}
</p>
)}
<div style={{marginTop: 8}}>
<button className="btn" onClick={runEval} disabled={loading}>
{loading && <span className="spinner" />}
{loading ? 'Evaluating...' : 'Run Evaluation'}
</button>
</div>
{error && <p style={{color: '#e94560', marginTop: 12}}>{error}</p>}
</div>
{result && <ResultCard result={result} />}
</>
)}
{tab === 'leaderboard' && <Leaderboard entries={leaderboard} onRefresh={refreshLeaderboard} />}
</div>
);
}
function ResultCard({ result }) {
const scores = result.per_episode_scores || [];
const maxScore = Math.max(...scores, 1);
// Build histogram buckets (0..25)
const buckets = new Array(26).fill(0);
scores.forEach(s => {
const idx = Math.max(0, Math.min(25, Math.round(s)));
buckets[idx]++;
});
const maxBucket = Math.max(...buckets, 1);
return (
<div className="card">
<h2>Results</h2>
<div className="results-grid">
<div className="stat-box">
<div className="value">{result.mean_score.toFixed(1)}</div>
<div className="label">Mean</div>
</div>
<div className="stat-box">
<div className="value">{result.median_score.toFixed(1)}</div>
<div className="label">Median</div>
</div>
<div className="stat-box">
<div className="value" style={{color: '#3498db'}}>
{result.min_score.toFixed(0)}-{result.max_score.toFixed(0)}
</div>
<div className="label">Range</div>
</div>
<div className="stat-box">
<div className="value" style={{color: '#f1c40f'}}>
#{result.leaderboard_position}
</div>
<div className="label">Rank</div>
</div>
</div>
<label>Score Distribution ({result.num_episodes} episodes)</label>
<div className="histogram" style={{marginTop: 8}}>
{buckets.map((count, i) => (
<div key={i} className="bar"
style={{height: `${(count / maxBucket) * 100}%`}}
title={`Score ${i}: ${count} episodes`} />
))}
</div>
<div style={{display: 'flex', justifyContent: 'space-between', marginTop: 4}}>
<span style={{fontSize: 11, color: '#666'}}>0</span>
<span style={{fontSize: 11, color: '#666'}}>25</span>
</div>
<div className="meta">
Evaluated in {result.eval_time_seconds}s | Partner: {result.partner_name} | {result.timestamp}
</div>
</div>
);
}
function Leaderboard({ entries, onRefresh }) {
if (entries.length === 0) {
return (
<div className="card">
<div className="empty-state">
<p>No submissions yet. Run an evaluation to get on the board.</p>
</div>
</div>
);
}
return (
<div className="card">
<h2>Leaderboard</h2>
<table>
<thead>
<tr>
<th>#</th>
<th>Agent</th>
<th>Partner</th>
<th>Mean</th>
<th>Median</th>
<th>Std</th>
<th>Episodes</th>
<th>Time</th>
</tr>
</thead>
<tbody>
{entries.map((e, i) => (
<tr key={e.id}>
<td className="rank">{i + 1}</td>
<td>{e.agent_name}</td>
<td><span className="partner-tag">{e.partner_name}</span></td>
<td className="score">{e.mean_score.toFixed(1)}/25</td>
<td>{e.median_score.toFixed(1)}</td>
<td>{e.std.toFixed(1)}</td>
<td>{e.num_episodes}</td>
<td style={{color: '#666', fontSize: 12}}>{e.timestamp}</td>
</tr>
))}
</tbody>
</table>
<div style={{marginTop: 12, display: 'flex', justifyContent: 'space-between', alignItems: 'center'}}>
<button className="btn btn-secondary" onClick={onRefresh} style={{width: 'auto', padding: '8px 20px'}}>
Refresh
</button>
<span style={{fontSize: 12, color: '#666'}}>
Saved to: evaluation/leaderboard.json
</span>
</div>
</div>
);
}
ReactDOM.createRoot(document.getElementById('root')).render(<App />);
</script>
</body>
</html>