export default function RewardTrace({ trace = [], cumReward = 0, lastReward = null }) {
const bounds = trace.length
? trace.reduce(
(acc, v) => ({ min: Math.min(acc.min, v), max: Math.max(acc.max, v) }),
{ min: 0, max: 0 },
)
: { min: 0, max: 0 }
const lo = Math.min(bounds.min, -0.5)
const hi = Math.max(bounds.max, 1.0)
const span = Math.max(0.1, hi - lo)
const zeroFrac = (0 - lo) / span
const cls =
lastReward === null ? '' :
lastReward > 0.5 ? 'good' :
lastReward > 0 ? 'warn' :
'bad'
return (
Reward Trace · Episode
step={trace.length}/10
cum={cumReward.toFixed(2)}
{Array.from({ length: 10 }).map((_, i) => {
const v = trace[i]
const has = typeof v === 'number'
const upFrac = has && v > 0 ? Math.min(1, v / Math.max(hi, 0.01)) : 0
const downFrac = has && v < 0 ? Math.min(1, -v / Math.max(-lo, 0.01)) : 0
const upPct = `${(upFrac * 100 * (1 - zeroFrac)).toFixed(0)}%`
const downPct = `${(downFrac * 100 * zeroFrac).toFixed(0)}%`
return (
= 0 ? 'pos' : 'neg') : 'empty'}`}
title={has ? `R${i + 1}: reward ${v >= 0 ? '+' : ''}${v.toFixed(3)}` : `R${i + 1}: pending`}
>
{i + 1}
)
})}
last_step
{lastReward === null ? '—' : `${lastReward >= 0 ? '+' : ''}${Number(lastReward).toFixed(3)}`}
)
}