export default function RewardTrace({ trace = [], cumReward = 0, lastReward = null }) { const bounds = trace.length ? trace.reduce( (acc, v) => ({ min: Math.min(acc.min, v), max: Math.max(acc.max, v) }), { min: 0, max: 0 }, ) : { min: 0, max: 0 } const lo = Math.min(bounds.min, -0.5) const hi = Math.max(bounds.max, 1.0) const span = Math.max(0.1, hi - lo) const zeroFrac = (0 - lo) / span const cls = lastReward === null ? '' : lastReward > 0.5 ? 'good' : lastReward > 0 ? 'warn' : 'bad' return (
Reward Trace · Episode
step={trace.length}/10 cum={cumReward.toFixed(2)}
{Array.from({ length: 10 }).map((_, i) => { const v = trace[i] const has = typeof v === 'number' const upFrac = has && v > 0 ? Math.min(1, v / Math.max(hi, 0.01)) : 0 const downFrac = has && v < 0 ? Math.min(1, -v / Math.max(-lo, 0.01)) : 0 const upPct = `${(upFrac * 100 * (1 - zeroFrac)).toFixed(0)}%` const downPct = `${(downFrac * 100 * zeroFrac).toFixed(0)}%` return (
= 0 ? 'pos' : 'neg') : 'empty'}`} title={has ? `R${i + 1}: reward ${v >= 0 ? '+' : ''}${v.toFixed(3)}` : `R${i + 1}: pending`} >
{i + 1}
) })}
last_step {lastReward === null ? '—' : `${lastReward >= 0 ? '+' : ''}${Number(lastReward).toFixed(3)}`}
) }