import React, { useState, useEffect, useRef, useMemo } from 'react' const API = '' const DIFF_ORDER = ['easy', 'medium', 'hard', 'expert'] const DIFF_COLOR = { easy:'#22c55e', medium:'#6366f1', hard:'#f59e0b', expert:'#ef4444' } const DIFF_BG = { easy:'#f0fdf4', medium:'#eef2ff', hard:'#fffbeb', expert:'#fef2f2' } const COMP_COLORS = ['#6366f1','#0ea5e9','#22c55e','#f59e0b','#f43f5e'] const COMP_KEYS = ['weighted_completion','deadline_adherence', 'energy_efficiency','dependency_bonus','interruption_bonus'] const COMP_LABELS = ['Weighted Completion ×0.60','Deadline Adherence ×0.22', 'Energy Efficiency ×0.10','Dependency Bonus ×0.05', 'Interruption Bonus ×0.03'] const BASELINE = { easy:0.856, medium:0.523, hard:0.301, expert:0.221 } const LLM_TARGET = { easy:0.88, medium:0.58, hard:0.37, expert:0.27 } // ── Helpers ──────────────────────────────────────────────────────────────────── function trailingAvg(arr, w = 10) { return arr.map((_, i) => { const sl = arr.slice(Math.max(0, i - w + 1), i + 1) return sl.reduce((s, v) => s + v, 0) / sl.length }) } function arrAvg(arr) { return arr.length ? arr.reduce((s, v) => s + v, 0) / arr.length : 0 } // ── GRPO Training Chart (matches the reference image) ───────────────────────── function GRPOTrainingChart({ curve }) { const data = useMemo(() => { if (!curve || curve.length < 10) return null const n = curve.length const means = curve.map(d => d.mean) const maxes = curve.map(d => d.max ?? d.mean) const mins = curve.map(d => d.min ?? d.mean) const sm = trailingAvg(means, 10) const yLo = Math.min(-0.01, ...mins) - 0.005 const yHi = Math.max(...maxes) + 0.01 // Phase averages const t1 = Math.floor(n / 3), t2 = Math.floor((2 * n) / 3) const early = arrAvg(means.slice(0, t1)) const middle = arrAvg(means.slice(t1, t2)) const late = arrAvg(means.slice(t2)) return { n, means, maxes, mins, sm, yLo, yHi, startMean: means[0], endMean: means[n - 1], peakMean: Math.max(...means), early, middle, late } }, [curve]) if (!data) return null const { n, means, maxes, mins, sm, yLo, yHi, startMean, endMean, peakMean, early, middle, late } = data // SVG layout const W = 880, PAD = { t: 24, r: 20, b: 38, l: 52 } const cW = W - PAD.l - PAD.r const H1 = 220 const cH = H1 - PAD.t - PAD.b const x = i => PAD.l + (i / Math.max(n - 1, 1)) * cW const y = v => PAD.t + cH - ((v - yLo) / (yHi - yLo)) * cH const fp = v => v.toFixed(1) // Polyline points (generated once via useMemo already) const bandPts = curve.map((d, i) => `${fp(x(i))},${fp(y(d.min))}`).join(' ') + ' ' + [...curve].reverse().map((d, i) => `${fp(x(n - 1 - i))},${fp(y(d.max))}`).join(' ') const rawPts = means.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') const smPts = sm.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') const y0 = y(0) const yStart = y(startMean) // Y-axis ticks const yTicks = [] for (let v = -0.10; v <= yHi + 0.01; v = Math.round((v + 0.05) * 100) / 100) { if (v >= yLo) yTicks.push(v) } // X-axis ticks (every 200 or 100 steps) const xStep = n > 500 ? 200 : 100 const xTicks = [] for (let i = 0; i < n; i += xStep) xTicks.push(i) if (xTicks[xTicks.length - 1] !== n - 1) xTicks.push(n - 1) const phases = [ { label: 'Early', sub: '(first 1/3)', val: early, fill: '#fca5a5' }, { label: 'Middle', sub: '(second 1/3)', val: middle, fill: '#fcd34d' }, { label: 'Late', sub: '(final 1/3)', val: late, fill: '#86efac' }, ] const maxPhase = Math.max(early, middle, late) // Phase bar SVG dimensions const BW = W, BH = 130 const BPAD = { t: 10, b: 44, l: PAD.l, r: PAD.r } const bcW = BW - BPAD.l - BPAD.r const bcH = BH - BPAD.t - BPAD.b const barSlot = bcW / phases.length const barW = barSlot * 0.52 return (
{/* Chart title */}
⚡ StressTest — GRPO Training Reward Curve
Cognitive Load Manager · Meta OpenEnv Hackathon
Episode Reward Over Training (mean ± range per step)
{/* ── Top chart SVG ── */} {/* Y-axis grid + labels */} {yTicks.map(v => ( {v >= 0 ? `+${v.toFixed(2)}` : v.toFixed(2)} ))} {/* min/max band */} `${fp(x(i))},${fp(y(v))}`).join(' ')} fill="none" stroke="#6366f130" strokeWidth="0.7"/> `${fp(x(i))},${fp(y(v))}`).join(' ')} fill="none" stroke="#6366f130" strokeWidth="0.7"/> {/* Zero baseline (red dashed) */} {y0 >= PAD.t && y0 <= PAD.t + cH && ( )} {/* Start reward baseline (gray dotted) */} {/* Raw mean (thin) */} {/* Smoothed mean (thick) */} {/* X-axis ticks */} {xTicks.map(i => ( {i} ))} {/* X-axis label */} Training Step {/* Start annotation */} Start: {startMean >= 0 ? '+' : ''}{startMean.toFixed(4)} {/* End annotation */} End: {endMean >= 0 ? '+' : ''}{endMean.toFixed(4)} {/* Chart border */} {/* Legend */}
{[ { color:'#6366f112', stroke:'#6366f130', label:'min/max range', band:true }, { color:'#818cf8', label:'raw mean', opacity:0.5 }, { color:'#1d4ed8', label:'smoothed (window=10)', bold:true }, { color:'#ef4444', label:'zero baseline', dash:true }, { color:'#94a3b8', label:'start reward', dash:true }, ].map(l => ( {l.band ? ( ) : ( )} {l.label} ))}
{/* Phase bar subtitle */}
Average Reward by Training Phase (Early → Late shows improvement)
{/* ── Phase bar chart SVG ── */} {phases.map((p, i) => { const bH = (p.val / maxPhase) * bcH const bX = BPAD.l + i * barSlot + (barSlot - barW) / 2 const bY = BPAD.t + bcH - bH return ( {/* Bar */} {/* Value label above bar */} +{p.val.toFixed(4)} {/* Phase label */} {p.label} {p.sub} ) })} {/* Y=0 baseline */} {/* Summary stats row */}
{[ { l:'Total Steps', v: n.toLocaleString(), c:'#6366f1' }, { l:'Start', v: `+${startMean.toFixed(4)}`, c:'#6b7280' }, { l:'End', v: `+${endMean.toFixed(4)}`, c:'#1d4ed8' }, { l:'Total Gain', v: `+${(endMean-startMean).toFixed(4)}`, c:'#16a34a' }, { l:'Peak Mean', v: `+${peakMean.toFixed(4)}`, c:'#22c55e' }, ].map(s => (
{s.l}
{s.v}
))}
) } // ── Before vs After Training — single overlay chart ─────────────────────────── function BeforeAfterComparisonChart({ curve }) { const N = Math.min(120, Math.floor(curve.length / 5)) const beforeSlice = curve.slice(0, N) const afterSlice = curve.slice(curve.length - N) const bMeans = beforeSlice.map(d => d.mean) const aMeans = afterSlice.map(d => d.mean) const smB = trailingAvg(bMeans, 8) const smA = trailingAvg(aMeans, 8) const allVals = [...bMeans, ...aMeans] const yLo = Math.min(-0.01, ...allVals) - 0.005 const yHi = Math.max(...allVals) + 0.015 const W = 880, PAD = { t:28, r:20, b:40, l:54 }, H = 200 const cW = W - PAD.l - PAD.r const cH = H - PAD.t - PAD.b const x = i => PAD.l + (i / Math.max(N - 1, 1)) * cW const y = v => PAD.t + cH - ((v - yLo) / (yHi - yLo)) * cH const fp = v => v.toFixed(1) const bSmPts = smB.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') const aSmPts = smA.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') const bRawPts = bMeans.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') const aRawPts = aMeans.map((v, i) => `${fp(x(i))},${fp(y(v))}`).join(' ') // Filled area under each smoothed line const bFill = `${fp(x(0))},${fp(y(yLo))} ${bSmPts} ${fp(x(N-1))},${fp(y(yLo))}` const aFill = `${fp(x(0))},${fp(y(yLo))} ${aSmPts} ${fp(x(N-1))},${fp(y(yLo))}` const avgB = arrAvg(bMeans), avgA = arrAvg(aMeans) const gain = avgA - avgB const gainPct = avgB !== 0 ? ((gain / Math.abs(avgB)) * 100).toFixed(1) : '∞' const yTicks = [] for (let v = -0.05; v <= yHi + 0.01; v = Math.round((v + 0.05) * 100) / 100) { if (v >= yLo) yTicks.push(v) } const xLabels = [0, Math.floor(N/4), Math.floor(N/2), Math.floor(3*N/4), N-1] return (
{/* Header row */}
📊 Before vs After Training — Reward Comparison
First {N} steps (pre-convergence) vs Last {N} steps (post-GRPO) — both smoothed with window=8
{/* Stat chips */}
{[ { label:'Before avg', val:`+${avgB.toFixed(4)}`, bg:'#fff1f2', fg:'#e11d48', border:'#fecdd3' }, { label:'After avg', val:`+${avgA.toFixed(4)}`, bg:'#f0fdf4', fg:'#15803d', border:'#bbf7d0' }, { label:'Gain', val:`+${gain.toFixed(4)}`, bg:'#eff6ff', fg:'#1d4ed8', border:'#bfdbfe' }, { label:'Improvement', val:`${gainPct}%`, bg:'#fdf4ff', fg:'#7e22ce', border:'#e9d5ff' }, ].map(s => (
{s.label}
{s.val}
))}
{/* Chart SVG */} {/* Grid + Y ticks */} {yTicks.map(v => ( {v >= 0 ? `+${v.toFixed(2)}` : v.toFixed(2)} ))} {/* Zero line */} {/* Filled areas */} {/* Raw lines (thin, behind) */} {/* Smoothed lines (prominent) */} {/* Average reference lines */} {/* Avg labels on right */} avg avg {/* Start/end dots */} {/* X-axis */} {xLabels.map(i => ( {i} ))} Steps (relative within window) {/* Chart border */} {/* Legend */}
Before Training (steps 0–{N}) After Training (steps {curve.length - N}–{curve.length}) Phase average
) } // ── Scoring Formula Card (standalone, visually rich) ───────────────────────── const FORMULA_ITEMS = [ { key:'completion', label:'Task Completion', weight:0.60, color:'#6366f1', desc:'Fraction of tasks fully completed, weighted by priority' }, { key:'deadline', label:'Deadline Adherence', weight:0.22, color:'#0ea5e9', desc:'Bonus for finishing before deadline; penalty for missing it' }, { key:'energy', label:'Energy Efficiency', weight:0.10, color:'#22c55e', desc:'Penalises high worker fatigue and stress spikes' }, { key:'dependency', label:'Dependency Bonus', weight:0.05, color:'#f59e0b', desc:'Reward for respecting task dependency order' }, { key:'interrupt', label:'Interruption Bonus', weight:0.03, color:'#f43f5e', desc:'Reward for minimising context-switching interruptions' }, ] function ScoringFormulaCard() { return (
{/* Title */}
🏆 Reward Scoring Formula
Each action is scored on 5 dimensions. Weights reflect cognitive-load research priorities.
{/* Stacked weight bar */}
Weight distribution
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.10 && ( {(it.weight * 100).toFixed(0)}% )}
))}
{/* Labels under bar */}
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.08 ? it.label.split(' ')[0] : ''}
))}
{/* Component cards */}
{FORMULA_ITEMS.map(it => (
{/* Coloured top accent bar */}
{/* Weight badge */}
×{it.weight.toFixed(2)}
{it.label}
{it.desc}
))}
{/* Formula expression */}
Formula
score = {FORMULA_ITEMS.map((it, idx) => ( {it.key === 'completion' ? 'completion' : it.key === 'deadline' ? 'deadline' : it.key === 'energy' ? 'energy' : it.key === 'dependency' ? 'dep' : 'interrupt'}×{it.weight} {idx < FORMULA_ITEMS.length - 1 && + } ))} ∈ (0.01, 0.99)
) } // ── Tiny SVG charts (used by benchmark section) ──────────────────────────────── function LineChart({ data, color = '#6366f1', height = 120 }) { if (!data || !data.length) return (
No data yet
) const W = Math.max(data.length * 18, 300) const lo = Math.min(...data), hi = Math.max(...data) const sp = hi === lo ? 1 : hi - lo const py = v => (height - 14) - ((v - lo) / sp) * (height - 26) + 7 const pts = data.map((v, i) => `${i * 18 + 9},${py(v)}`).join(' ') return ( `${i * 18 + 9},${py(v)}`), `${(data.length - 1) * 18 + 9},${height}`].join(' ')} fill={color + '18'} stroke="none"/> ) } // Small band chart (used by demo training live view) function BandChart({ curve, height = 140 }) { if (!curve || !curve.length) return (
Training data will appear here
) const means = curve.map(d => d.mean) const maxes = curve.map(d => d.max ?? d.mean) const mins = curve.map(d => d.min ?? d.mean) const W = Math.max(curve.length * 18, 300) const lo = Math.min(...mins), hi = Math.max(...maxes) const sp = hi === lo ? 1 : hi - lo const py = v => (height - 14) - ((v - lo) / sp) * (height - 26) + 7 const bandPts = [ ...mins.map((v, i) => `${i * 18 + 9},${py(v)}`), ...[...maxes].reverse().map((v, i) => `${(curve.length - 1 - i) * 18 + 9},${py(v)}`), ].join(' ') const meanPts = means.map((v, i) => `${i * 18 + 9},${py(v)}`).join(' ') return ( `${i * 18 + 9},${py(v)}`).join(' ')} fill="none" stroke="#6366f140" strokeWidth="1"/> `${i * 18 + 9},${py(v)}`).join(' ')} fill="none" stroke="#6366f140" strokeWidth="1"/> ) } // ── Before/After grouped bar ─────────────────────────────────────────────────── function BeforeAfterBars({ before, after }) { if (!before && !after) return (
Run demo training to see before/after comparison
) return (
{DIFF_ORDER.map(d => { const bv = before?.[d] ?? null const av = after?.[d] ?? null const bPct = bv != null ? `${Math.min(100, bv * 100).toFixed(0)}%` : '0%' const aPct = av != null ? `${Math.min(100, av * 100).toFixed(0)}%` : '0%' const tPct = `${Math.min(100, LLM_TARGET[d] * 100).toFixed(0)}%` return (
{d}
{av != null && bv != null && (
bv ? '#16a34a' : '#ef4444' }}> {av > bv ? '▲' : '▼'}  {av > bv ? '+' : ''}{(av - bv).toFixed(4)} vs before
)}
) })}
) } function BarRow({ label, pct, color, val, dashed, glow }) { return (
{label} {val}
) } function SectionHeader({ children, action }) { return (
{children}
{action}
) } // ── Demo training progress section ──────────────────────────────────────────── function DemoTrainingProgress({ state, onStart }) { const { running, status, current_step, total_steps, curve, before, after, metadata, error } = state const pct = total_steps > 0 ? Math.round((current_step / total_steps) * 100) : 0 const lastEntry = curve && curve.length ? curve[curve.length - 1] : null const meanTrace = (curve || []).map(d => d.mean) return (
🧪 Demo Training — Random → Heuristic Agent
Simulates GRPO reward progression on the HF Space (no GPU required). Runs {total_steps} steps. Results saved to{' '} reward_curve.json.
{['medium','hard','expert'].map(d => ( ))}
{status !== 'idle' && (
{status==='running' && `⚡ Training… step ${current_step}/${total_steps}`} {status==='completed' && `✅ Training complete — ${total_steps} steps`} {status==='error' && `❌ ${error}`} {pct}%
)} {(running || status==='completed') && (
{[ { l:'Step', v:`${current_step}/${total_steps}`, c:'#6366f1' }, { l:'Last Mean', v: lastEntry ? lastEntry.mean.toFixed(4) : '—', c: lastEntry && lastEntry.mean >= 0 ? '#16a34a':'#ef4444' }, { l:'Last Max', v: lastEntry ? lastEntry.max.toFixed(4) : '—', c:'#22c55e' }, { l:'Difficulty', v: metadata?.difficulty ?? '—', c:'#0ea5e9' }, ].map(s => (
{s.l}
{s.v}
))}
)} {meanTrace.length > 0 && (
Live Reward Curve (mean ± band)
)} {/* Before/After when demo completes */} {status === 'completed' && (before || after) && (
Before vs After — Score Comparison
)} {status === 'idle' && (
Click ▶ medium / ▶ hard / ▶ expert to start demo training.
)}
) } // ── Benchmark section ────────────────────────────────────────────────────────── function BenchmarkSection() { const [data, setData] = useState(null) const [running, setRunning] = useState(false) const [sel, setSel] = useState('medium') const run = async () => { setRunning(true); setData(null) try { const r = await fetch(`${API}/benchmark`) if (r.ok) setData(await r.json()) } catch(e) { console.error(e) } finally { setRunning(false) } } const selD = data?.[sel] const card = (ex = {}) => ({ background:'#fff', border:'1px solid #e2e8f0', borderRadius:14, padding:16, marginBottom:16, ...ex, }) return ( <>
Heuristic Agent Benchmark

Runs the deterministic heuristic on all 4 difficulties (seed=42).

{DIFF_ORDER.map(d => { const score = data?.[d]?.score const bPct = `${Math.min(100, BASELINE[d] * 100).toFixed(0)}%` const sPct = score != null ? `${Math.min(100, score * 100).toFixed(0)}%` : '0%' return (
{d}
) })}
{data && (
{DIFF_ORDER.map(d => ( ))}
{selD && !selD.error && (
Stats — {sel}
{[ { l:'Score', v: selD.score?.toFixed(4) }, { l:'Total Reward', v: selD.total_reward?.toFixed(3) }, { l:'Steps', v: selD.steps }, { l:'Tasks Done', v:`${selD.tasks_done}/${selD.tasks_total}`}, { l:'Avg Energy', v: selD.avg_energy?.toFixed(3) }, { l:'Deadlines', v:`${selD.deadlines_met}/${selD.deadlines_total}`}, ].map(s => (
{s.l}
{s.v}
))}
{selD.components && ( <> Score Components )}
Step Rewards
Energy / Stress
`${i * 18 + 9},${80 - v * 70}`).join(' ')} fill="none" stroke="#22c55e" strokeWidth="2" strokeLinejoin="round"/> `${i * 18 + 9},${80 - v * 70}`).join(' ')} fill="none" stroke="#f59e0b" strokeWidth="2" strokeLinejoin="round" strokeDasharray="5 3"/>
Energy  Stress
)}
)} ) } function ComponentBar({ components }) { const total = COMP_KEYS.reduce((s, k) => s + (components[k] || 0), 0) return (
{COMP_KEYS.map((k, i) => { const v = components[k] || 0 const pct = total > 0 ? (v / total) * 100 : 0 return
2 ? 2 : 0 }}/> })}
{COMP_KEYS.map((k, i) => ( {COMP_LABELS[i].split(' ')[0]}: {(components[k] || 0).toFixed(4)} ))}
) } // ── Main TrainingDashboard ───────────────────────────────────────────────────── export default function TrainingDashboard() { const [activeTab, setActiveTab] = useState('training') const [trainState, setTrainState] = useState({ running:false, status:'idle', current_step:0, total_steps:25, difficulty:'medium', curve:[], before:null, after:null, metadata:null, error:null, }) const [savedLog, setSavedLog] = useState(null) const esRef = useRef(null) // Load saved training log on mount useEffect(() => { fetch(`${API}/training-log`) .then(r => r.ok ? r.json() : null) .then(d => { if (d) setSavedLog(d) }) .catch(() => {}) }, []) const startTraining = async (difficulty) => { if (trainState.running) return await fetch(`${API}/train/start?difficulty=${difficulty}&steps=25`, { method:'POST' }) if (esRef.current) { esRef.current.close(); esRef.current = null } const es = new EventSource(`${API}/train/stream`) esRef.current = es es.onmessage = (ev) => { const d = JSON.parse(ev.data) setTrainState(d) if (d.status === 'completed' || d.status === 'error') { fetch(`${API}/training-log`) .then(r => r.ok ? r.json() : null) .then(saved => { if (saved) setSavedLog(saved) }) .catch(() => {}) es.close(); esRef.current = null } } es.onerror = () => { es.close(); esRef.current = null } } useEffect(() => () => { if (esRef.current) esRef.current.close() }, []) // The saved log curve — show GRPO chart if it has many steps (real training data) const savedCurve = savedLog?.curve ?? [] const hasRealTrainingData = savedCurve.length > 100 const TABS = [ { id:'training', label:'🧪 Training Progress' }, { id:'benchmark', label:'📈 Benchmarks' }, ] return (
{TABS.map(t => ( ))}
{activeTab === 'training' && ( <> {/* Real GRPO training chart — shown when actual data exists */} {hasRealTrainingData && ( <> )} {/* Demo training controls */} {/* Scoring formula — always visible */} {/* No data placeholder */} {!hasRealTrainingData && trainState.status === 'idle' && (
📈
GRPO Training Charts will appear here
The full training chart + before/after comparison (1,116 steps) load automatically from{' '} reward_curve.json .
Or click ▶ medium above to run a quick 25-step demo.
)} )} {activeTab === 'benchmark' && }
) }