import React, { useState, useEffect, useRef, useMemo } from 'react'
const API = ''
const DIFF_ORDER = ['easy', 'medium', 'hard', 'expert']
const DIFF_COLOR = { easy:'#22c55e', medium:'#6366f1', hard:'#f59e0b', expert:'#ef4444' }
const DIFF_BG = { easy:'#f0fdf4', medium:'#eef2ff', hard:'#fffbeb', expert:'#fef2f2' }
const COMP_COLORS = ['#6366f1','#0ea5e9','#22c55e','#f59e0b','#f43f5e']
const COMP_KEYS = ['weighted_completion','deadline_adherence',
'energy_efficiency','dependency_bonus','interruption_bonus']
const COMP_LABELS = ['Weighted Completion ×0.60','Deadline Adherence ×0.22',
'Energy Efficiency ×0.10','Dependency Bonus ×0.05',
'Interruption Bonus ×0.03']
const BASELINE = { easy:0.856, medium:0.523, hard:0.301, expert:0.221 }
const LLM_TARGET = { easy:0.88, medium:0.58, hard:0.37, expert:0.27 }
// ── Helpers ────────────────────────────────────────────────────────────────────
function trailingAvg(arr, w = 10) {
return arr.map((_, i) => {
const sl = arr.slice(Math.max(0, i - w + 1), i + 1)
return sl.reduce((s, v) => s + v, 0) / sl.length
})
}
function arrAvg(arr) {
return arr.length ? arr.reduce((s, v) => s + v, 0) / arr.length : 0
}
// ── GRPO Training Chart (matches the reference image) ─────────────────────────
function GRPOTrainingChart({ curve }) {
const data = useMemo(() => {
if (!curve || curve.length < 10) return null
const n = curve.length
const means = curve.map(d => d.mean)
const maxes = curve.map(d => d.max ?? d.mean)
const mins = curve.map(d => d.min ?? d.mean)
const sm = trailingAvg(means, 10)
const yLo = Math.min(-0.01, ...mins) - 0.005
const yHi = Math.max(...maxes) + 0.01
// Phase averages
const t1 = Math.floor(n / 3), t2 = Math.floor((2 * n) / 3)
const early = arrAvg(means.slice(0, t1))
const middle = arrAvg(means.slice(t1, t2))
const late = arrAvg(means.slice(t2))
return { n, means, maxes, mins, sm, yLo, yHi,
startMean: means[0], endMean: means[n - 1],
peakMean: Math.max(...means),
early, middle, late }
}, [curve])
if (!data) return null
const { n, means, maxes, mins, sm,
yLo, yHi, startMean, endMean, peakMean,
early, middle, late } = data
// SVG layout
const W = 880, PAD = { t: 24, r: 20, b: 38, l: 52 }
const cW = W - PAD.l - PAD.r
const H1 = 220
const cH = H1 - PAD.t - PAD.b
const x = i => PAD.l + (i / Math.max(n - 1, 1)) * cW
const y = v => PAD.t + cH - ((v - yLo) / (yHi - yLo)) * cH
const fp = v => v.toFixed(1)
// Polyline points (generated once via useMemo already)
const bandPts = curve.map((d, i) => `${fp(x(i))},${fp(y(d.min))}`).join(' ')
+ ' '
+ [...curve].reverse().map((d, i) => `${fp(x(n - 1 - i))},${fp(y(d.max))}`).join(' ')
const rawPts = means.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
const smPts = sm.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
const y0 = y(0)
const yStart = y(startMean)
// Y-axis ticks
const yTicks = []
for (let v = -0.10; v <= yHi + 0.01; v = Math.round((v + 0.05) * 100) / 100) {
if (v >= yLo) yTicks.push(v)
}
// X-axis ticks (every 200 or 100 steps)
const xStep = n > 500 ? 200 : 100
const xTicks = []
for (let i = 0; i < n; i += xStep) xTicks.push(i)
if (xTicks[xTicks.length - 1] !== n - 1) xTicks.push(n - 1)
const phases = [
{ label: 'Early', sub: '(first 1/3)', val: early, fill: '#fca5a5' },
{ label: 'Middle', sub: '(second 1/3)', val: middle, fill: '#fcd34d' },
{ label: 'Late', sub: '(final 1/3)', val: late, fill: '#86efac' },
]
const maxPhase = Math.max(early, middle, late)
// Phase bar SVG dimensions
const BW = W, BH = 130
const BPAD = { t: 10, b: 44, l: PAD.l, r: PAD.r }
const bcW = BW - BPAD.l - BPAD.r
const bcH = BH - BPAD.t - BPAD.b
const barSlot = bcW / phases.length
const barW = barSlot * 0.52
return (
{/* Chart title */}
⚡ StressTest — GRPO Training Reward Curve
Cognitive Load Manager · Meta OpenEnv Hackathon
Episode Reward Over Training (mean ± range per step)
{/* ── Top chart SVG ── */}
{/* Y-axis grid + labels */}
{yTicks.map(v => (
{v >= 0 ? `+${v.toFixed(2)}` : v.toFixed(2)}
))}
{/* min/max band */}
`${fp(x(i))},${fp(y(v))}`).join(' ')}
fill="none" stroke="#6366f130" strokeWidth="0.7"/>
`${fp(x(i))},${fp(y(v))}`).join(' ')}
fill="none" stroke="#6366f130" strokeWidth="0.7"/>
{/* Zero baseline (red dashed) */}
{y0 >= PAD.t && y0 <= PAD.t + cH && (
)}
{/* Start reward baseline (gray dotted) */}
{/* Raw mean (thin) */}
{/* Smoothed mean (thick) */}
{/* X-axis ticks */}
{xTicks.map(i => (
{i}
))}
{/* X-axis label */}
Training Step
{/* Start annotation */}
Start: {startMean >= 0 ? '+' : ''}{startMean.toFixed(4)}
{/* End annotation */}
End: {endMean >= 0 ? '+' : ''}{endMean.toFixed(4)}
{/* Chart border */}
{/* Legend */}
{[
{ color:'#6366f112', stroke:'#6366f130', label:'min/max range', band:true },
{ color:'#818cf8', label:'raw mean', opacity:0.5 },
{ color:'#1d4ed8', label:'smoothed (window=10)', bold:true },
{ color:'#ef4444', label:'zero baseline', dash:true },
{ color:'#94a3b8', label:'start reward', dash:true },
].map(l => (
{l.band ? (
) : (
)}
{l.label}
))}
{/* Phase bar subtitle */}
Average Reward by Training Phase (Early → Late shows improvement)
{/* ── Phase bar chart SVG ── */}
{phases.map((p, i) => {
const bH = (p.val / maxPhase) * bcH
const bX = BPAD.l + i * barSlot + (barSlot - barW) / 2
const bY = BPAD.t + bcH - bH
return (
{/* Bar */}
{/* Value label above bar */}
+{p.val.toFixed(4)}
{/* Phase label */}
{p.label}
{p.sub}
)
})}
{/* Y=0 baseline */}
{/* Summary stats row */}
{[
{ l:'Total Steps', v: n.toLocaleString(), c:'#6366f1' },
{ l:'Start', v: `+${startMean.toFixed(4)}`, c:'#6b7280' },
{ l:'End', v: `+${endMean.toFixed(4)}`, c:'#1d4ed8' },
{ l:'Total Gain', v: `+${(endMean-startMean).toFixed(4)}`, c:'#16a34a' },
{ l:'Peak Mean', v: `+${peakMean.toFixed(4)}`, c:'#22c55e' },
].map(s => (
))}
)
}
// ── Before vs After Training — single overlay chart ───────────────────────────
function BeforeAfterComparisonChart({ curve }) {
const N = Math.min(120, Math.floor(curve.length / 5))
const beforeSlice = curve.slice(0, N)
const afterSlice = curve.slice(curve.length - N)
const bMeans = beforeSlice.map(d => d.mean)
const aMeans = afterSlice.map(d => d.mean)
const smB = trailingAvg(bMeans, 8)
const smA = trailingAvg(aMeans, 8)
const allVals = [...bMeans, ...aMeans]
const yLo = Math.min(-0.01, ...allVals) - 0.005
const yHi = Math.max(...allVals) + 0.015
const W = 880, PAD = { t:28, r:20, b:40, l:54 }, H = 200
const cW = W - PAD.l - PAD.r
const cH = H - PAD.t - PAD.b
const x = i => PAD.l + (i / Math.max(N - 1, 1)) * cW
const y = v => PAD.t + cH - ((v - yLo) / (yHi - yLo)) * cH
const fp = v => v.toFixed(1)
const bSmPts = smB.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
const aSmPts = smA.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
const bRawPts = bMeans.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
const aRawPts = aMeans.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ')
// Filled area under each smoothed line
const bFill = `${fp(x(0))},${fp(y(yLo))} ${bSmPts} ${fp(x(N-1))},${fp(y(yLo))}`
const aFill = `${fp(x(0))},${fp(y(yLo))} ${aSmPts} ${fp(x(N-1))},${fp(y(yLo))}`
const avgB = arrAvg(bMeans), avgA = arrAvg(aMeans)
const gain = avgA - avgB
const gainPct = avgB !== 0 ? ((gain / Math.abs(avgB)) * 100).toFixed(1) : '∞'
const yTicks = []
for (let v = -0.05; v <= yHi + 0.01; v = Math.round((v + 0.05) * 100) / 100) {
if (v >= yLo) yTicks.push(v)
}
const xLabels = [0, Math.floor(N/4), Math.floor(N/2), Math.floor(3*N/4), N-1]
return (
{/* Header row */}
📊 Before vs After Training — Reward Comparison
First {N} steps (pre-convergence) vs Last {N} steps (post-GRPO) — both smoothed with window=8
{/* Stat chips */}
{[
{ label:'Before avg', val:`+${avgB.toFixed(4)}`,
bg:'#fff1f2', fg:'#e11d48', border:'#fecdd3' },
{ label:'After avg', val:`+${avgA.toFixed(4)}`,
bg:'#f0fdf4', fg:'#15803d', border:'#bbf7d0' },
{ label:'Gain', val:`+${gain.toFixed(4)}`,
bg:'#eff6ff', fg:'#1d4ed8', border:'#bfdbfe' },
{ label:'Improvement', val:`${gainPct}%`,
bg:'#fdf4ff', fg:'#7e22ce', border:'#e9d5ff' },
].map(s => (
))}
{/* Chart SVG */}
{/* Grid + Y ticks */}
{yTicks.map(v => (
{v >= 0 ? `+${v.toFixed(2)}` : v.toFixed(2)}
))}
{/* Zero line */}
{/* Filled areas */}
{/* Raw lines (thin, behind) */}
{/* Smoothed lines (prominent) */}
{/* Average reference lines */}
{/* Avg labels on right */}
avg
avg
{/* Start/end dots */}
{/* X-axis */}
{xLabels.map(i => (
{i}
))}
Steps (relative within window)
{/* Chart border */}
{/* Legend */}
Before Training (steps 0–{N})
After Training (steps {curve.length - N}–{curve.length})
Phase average
)
}
// ── Scoring Formula Card (standalone, visually rich) ─────────────────────────
const FORMULA_ITEMS = [
{ key:'completion', label:'Task Completion', weight:0.60, color:'#6366f1',
desc:'Fraction of tasks fully completed, weighted by priority' },
{ key:'deadline', label:'Deadline Adherence', weight:0.22, color:'#0ea5e9',
desc:'Bonus for finishing before deadline; penalty for missing it' },
{ key:'energy', label:'Energy Efficiency', weight:0.10, color:'#22c55e',
desc:'Penalises high worker fatigue and stress spikes' },
{ key:'dependency', label:'Dependency Bonus', weight:0.05, color:'#f59e0b',
desc:'Reward for respecting task dependency order' },
{ key:'interrupt', label:'Interruption Bonus', weight:0.03, color:'#f43f5e',
desc:'Reward for minimising context-switching interruptions' },
]
function ScoringFormulaCard() {
return (
{/* Title */}
🏆 Reward Scoring Formula
Each action is scored on 5 dimensions. Weights reflect cognitive-load research priorities.
{/* Stacked weight bar */}
Weight distribution
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.10 && (
{(it.weight * 100).toFixed(0)}%
)}
))}
{/* Labels under bar */}
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.08 ? it.label.split(' ')[0] : ''}
))}
{/* Component cards */}
{FORMULA_ITEMS.map(it => (
{/* Coloured top accent bar */}
{/* Weight badge */}
×{it.weight.toFixed(2)}
{it.label}
{it.desc}
))}
{/* Formula expression */}
Formula
score =
{FORMULA_ITEMS.map((it, idx) => (
{it.key === 'completion' ? 'completion' :
it.key === 'deadline' ? 'deadline' :
it.key === 'energy' ? 'energy' :
it.key === 'dependency' ? 'dep' : 'interrupt'}×{it.weight}
{idx < FORMULA_ITEMS.length - 1 &&
+ }
))}
∈ (0.01, 0.99)
)
}
// ── Tiny SVG charts (used by benchmark section) ────────────────────────────────
function LineChart({ data, color = '#6366f1', height = 120 }) {
if (!data || !data.length) return (
No data yet
)
const W = Math.max(data.length * 18, 300)
const lo = Math.min(...data), hi = Math.max(...data)
const sp = hi === lo ? 1 : hi - lo
const py = v => (height - 14) - ((v - lo) / sp) * (height - 26) + 7
const pts = data.map((v, i) => `${i * 18 + 9},${py(v)}`).join(' ')
return (
`${i * 18 + 9},${py(v)}`),
`${(data.length - 1) * 18 + 9},${height}`].join(' ')}
fill={color + '18'} stroke="none"/>
)
}
// Small band chart (used by demo training live view)
function BandChart({ curve, height = 140 }) {
if (!curve || !curve.length) return (
Training data will appear here
)
const means = curve.map(d => d.mean)
const maxes = curve.map(d => d.max ?? d.mean)
const mins = curve.map(d => d.min ?? d.mean)
const W = Math.max(curve.length * 18, 300)
const lo = Math.min(...mins), hi = Math.max(...maxes)
const sp = hi === lo ? 1 : hi - lo
const py = v => (height - 14) - ((v - lo) / sp) * (height - 26) + 7
const bandPts = [
...mins.map((v, i) => `${i * 18 + 9},${py(v)}`),
...[...maxes].reverse().map((v, i) =>
`${(curve.length - 1 - i) * 18 + 9},${py(v)}`),
].join(' ')
const meanPts = means.map((v, i) => `${i * 18 + 9},${py(v)}`).join(' ')
return (
`${i * 18 + 9},${py(v)}`).join(' ')}
fill="none" stroke="#6366f140" strokeWidth="1"/>
`${i * 18 + 9},${py(v)}`).join(' ')}
fill="none" stroke="#6366f140" strokeWidth="1"/>
)
}
// ── Before/After grouped bar ───────────────────────────────────────────────────
function BeforeAfterBars({ before, after }) {
if (!before && !after) return (
Run demo training to see before/after comparison
)
return (
{DIFF_ORDER.map(d => {
const bv = before?.[d] ?? null
const av = after?.[d] ?? null
const bPct = bv != null ? `${Math.min(100, bv * 100).toFixed(0)}%` : '0%'
const aPct = av != null ? `${Math.min(100, av * 100).toFixed(0)}%` : '0%'
const tPct = `${Math.min(100, LLM_TARGET[d] * 100).toFixed(0)}%`
return (
{d}
{av != null && bv != null && (
bv ? '#16a34a' : '#ef4444' }}>
{av > bv ? '▲' : '▼'}
{av > bv ? '+' : ''}{(av - bv).toFixed(4)} vs before
)}
)
})}
)
}
function BarRow({ label, pct, color, val, dashed, glow }) {
return (
)
}
function SectionHeader({ children, action }) {
return (
)
}
// ── Demo training progress section ────────────────────────────────────────────
function DemoTrainingProgress({ state, onStart }) {
const { running, status, current_step, total_steps, curve,
before, after, metadata, error } = state
const pct = total_steps > 0
? Math.round((current_step / total_steps) * 100) : 0
const lastEntry = curve && curve.length ? curve[curve.length - 1] : null
const meanTrace = (curve || []).map(d => d.mean)
return (
🧪 Demo Training — Random → Heuristic Agent
Simulates GRPO reward progression on the HF Space (no GPU required).
Runs {total_steps} steps. Results saved to{' '}
reward_curve.json.
{['medium','hard','expert'].map(d => (
onStart(d)} disabled={running}
style={{ padding:'8px 14px', borderRadius:8, border:'none',
background: running ? '#e2e8f0' : DIFF_BG[d],
color: running ? '#94a3b8' : DIFF_COLOR[d],
fontWeight:700, fontSize:12, cursor: running ? 'not-allowed':'pointer',
textTransform:'capitalize' }}>
{running ? '⏳' : '▶'} {d}
))}
{status !== 'idle' && (
{status==='running' && `⚡ Training… step ${current_step}/${total_steps}`}
{status==='completed' && `✅ Training complete — ${total_steps} steps`}
{status==='error' && `❌ ${error}`}
{pct}%
)}
{(running || status==='completed') && (
{[
{ l:'Step', v:`${current_step}/${total_steps}`, c:'#6366f1' },
{ l:'Last Mean', v: lastEntry ? lastEntry.mean.toFixed(4) : '—',
c: lastEntry && lastEntry.mean >= 0 ? '#16a34a':'#ef4444' },
{ l:'Last Max', v: lastEntry ? lastEntry.max.toFixed(4) : '—', c:'#22c55e' },
{ l:'Difficulty', v: metadata?.difficulty ?? '—', c:'#0ea5e9' },
].map(s => (
))}
)}
{meanTrace.length > 0 && (
Live Reward Curve (mean ± band)
)}
{/* Before/After when demo completes */}
{status === 'completed' && (before || after) && (
Before vs After — Score Comparison
)}
{status === 'idle' && (
Click ▶ medium / ▶ hard / ▶ expert to start demo training.
)}
)
}
// ── Benchmark section ──────────────────────────────────────────────────────────
function BenchmarkSection() {
const [data, setData] = useState(null)
const [running, setRunning] = useState(false)
const [sel, setSel] = useState('medium')
const run = async () => {
setRunning(true); setData(null)
try {
const r = await fetch(`${API}/benchmark`)
if (r.ok) setData(await r.json())
} catch(e) { console.error(e) }
finally { setRunning(false) }
}
const selD = data?.[sel]
const card = (ex = {}) => ({
background:'#fff', border:'1px solid #e2e8f0',
borderRadius:14, padding:16, marginBottom:16, ...ex,
})
return (
<>
Heuristic Agent Benchmark
Runs the deterministic heuristic on all 4 difficulties (seed=42).
{running ? '⏳ Running…' : '▶ Run Benchmarks'}
{DIFF_ORDER.map(d => {
const score = data?.[d]?.score
const bPct = `${Math.min(100, BASELINE[d] * 100).toFixed(0)}%`
const sPct = score != null ? `${Math.min(100, score * 100).toFixed(0)}%` : '0%'
return (
)
})}
{data && (
{DIFF_ORDER.map(d => (
setSel(d)}
style={{ padding:'7px 16px', borderRadius:8, border:'none',
background: sel === d ? DIFF_COLOR[d] : DIFF_BG[d],
color: sel === d ? '#fff' : DIFF_COLOR[d],
fontWeight:700, fontSize:13, cursor:'pointer',
textTransform:'capitalize' }}>{d}
))}
{selD && !selD.error && (
Stats — {sel}
{[
{ l:'Score', v: selD.score?.toFixed(4) },
{ l:'Total Reward', v: selD.total_reward?.toFixed(3) },
{ l:'Steps', v: selD.steps },
{ l:'Tasks Done', v:`${selD.tasks_done}/${selD.tasks_total}`},
{ l:'Avg Energy', v: selD.avg_energy?.toFixed(3) },
{ l:'Deadlines', v:`${selD.deadlines_met}/${selD.deadlines_total}`},
].map(s => (
))}
{selD.components && (
<>
Score Components
>
)}
Step Rewards
Energy / Stress
`${i * 18 + 9},${80 - v * 70}`).join(' ')}
fill="none" stroke="#22c55e" strokeWidth="2" strokeLinejoin="round"/>
`${i * 18 + 9},${80 - v * 70}`).join(' ')}
fill="none" stroke="#f59e0b" strokeWidth="2"
strokeLinejoin="round" strokeDasharray="5 3"/>
─ Energy
╌ Stress
)}
)}
>
)
}
function ComponentBar({ components }) {
const total = COMP_KEYS.reduce((s, k) => s + (components[k] || 0), 0)
return (
{COMP_KEYS.map((k, i) => {
const v = components[k] || 0
const pct = total > 0 ? (v / total) * 100 : 0
return
2 ? 2 : 0 }}/>
})}
{COMP_KEYS.map((k, i) => (
{COMP_LABELS[i].split(' ')[0]}: {(components[k] || 0).toFixed(4)}
))}
)
}
// ── Main TrainingDashboard ─────────────────────────────────────────────────────
export default function TrainingDashboard() {
const [activeTab, setActiveTab] = useState('training')
const [trainState, setTrainState] = useState({
running:false, status:'idle', current_step:0, total_steps:25,
difficulty:'medium', curve:[], before:null, after:null,
metadata:null, error:null,
})
const [savedLog, setSavedLog] = useState(null)
const esRef = useRef(null)
// Load saved training log on mount
useEffect(() => {
fetch(`${API}/training-log`)
.then(r => r.ok ? r.json() : null)
.then(d => { if (d) setSavedLog(d) })
.catch(() => {})
}, [])
const startTraining = async (difficulty) => {
if (trainState.running) return
await fetch(`${API}/train/start?difficulty=${difficulty}&steps=25`, { method:'POST' })
if (esRef.current) { esRef.current.close(); esRef.current = null }
const es = new EventSource(`${API}/train/stream`)
esRef.current = es
es.onmessage = (ev) => {
const d = JSON.parse(ev.data)
setTrainState(d)
if (d.status === 'completed' || d.status === 'error') {
fetch(`${API}/training-log`)
.then(r => r.ok ? r.json() : null)
.then(saved => { if (saved) setSavedLog(saved) })
.catch(() => {})
es.close(); esRef.current = null
}
}
es.onerror = () => { es.close(); esRef.current = null }
}
useEffect(() => () => { if (esRef.current) esRef.current.close() }, [])
// The saved log curve — show GRPO chart if it has many steps (real training data)
const savedCurve = savedLog?.curve ?? []
const hasRealTrainingData = savedCurve.length > 100
const TABS = [
{ id:'training', label:'🧪 Training Progress' },
{ id:'benchmark', label:'📈 Benchmarks' },
]
return (
{TABS.map(t => (
setActiveTab(t.id)}
style={{ padding:'9px 20px', borderRadius:10, border:'none',
background: activeTab === t.id ? '#0f172a' : '#e2e8f0',
color: activeTab === t.id ? '#fff' : '#64748b',
fontWeight:700, fontSize:13, cursor:'pointer' }}>
{t.label}
))}
{activeTab === 'training' && (
<>
{/* Real GRPO training chart — shown when actual data exists */}
{hasRealTrainingData && (
<>
>
)}
{/* Demo training controls */}
{/* Scoring formula — always visible */}
{/* No data placeholder */}
{!hasRealTrainingData && trainState.status === 'idle' && (
📈
GRPO Training Charts will appear here
The full training chart + before/after comparison
(1,116 steps ) load automatically from{' '}
reward_curve.json
.
Or click ▶ medium above to run a quick 25-step demo.
)}
>
)}
{activeTab === 'benchmark' && }
)
}