"use client"; import { useState, useCallback, useRef, useEffect } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { ShieldAlert, Eye, Sparkles, RotateCcw, Play, Square } from "lucide-react"; import { trustColor } from "../lib/theme"; import type { AutoPolicy, StepResult, TaskType, Observation, EventItem } from "../lib/types"; type JudgePhase = 0 | 1 | 2 | 3; interface PhaseResult { score: number; detections: number; poisonings: number; steps: number; finalTrust: Record; events: EventItem[]; } const STEPS = [ { icon: ShieldAlert, num: "Step 1 of 3", title: "Show the Failure", desc: "The orchestrator delegates blindly using a random policy. No trust model. No verification. Watch as adversarial agents poison the mission unchecked.", btnLabel: "Run Random Policy", color: "var(--red)", }, { icon: Eye, num: "Step 2 of 3", title: "Show the Recovery", desc: "Now the orchestrator uses behavioral trust. It routes to trusted specialists, triggers verification when stakes are high, and catches adversarial attempts before they cascade.", btnLabel: "Run Heuristic Policy", color: "var(--green)", }, { icon: Sparkles, num: "Step 3 of 3", title: "Prove Generalization", desc: "Hidden profiles are reshuffled. The adversarial agent moves to a different slot. The orchestrator re-learns trust from scratch — proving this is a skill, not memorized identity.", btnLabel: "Swap Profiles & Replay", color: "var(--accent)", }, ]; export default function JudgeWizard({ autoRun, resetEpisode, swapProfiles, observation, events, info, running: globalRunning, }: { autoRun: (policy: AutoPolicy) => Promise; resetEpisode: (task?: TaskType, seed?: number) => Promise; swapProfiles: () => Promise; observation: Observation | null; events: EventItem[]; info: StepResult["info"] | undefined; running: boolean; }) { const [phase, setPhase] = useState(0); const [running, setRunning] = useState(false); const [results, setResults] = useState<(PhaseResult | null)[]>([null, null, null]); const eventsRef = useRef(events); const infoRef = useRef(info); const obsRef = useRef(observation); useEffect(() => { eventsRef.current = events; }, [events]); useEffect(() => { infoRef.current = info; }, [info]); useEffect(() => { obsRef.current = observation; }, [observation]); const captureResult = useCallback((): PhaseResult => ({ score: infoRef.current?.score ?? 0, detections: infoRef.current?.adversarial_detections ?? 0, poisonings: infoRef.current?.adversarial_poisonings ?? 0, steps: infoRef.current?.step_count ?? 0, finalTrust: obsRef.current?.trust_snapshot ?? {}, events: [...eventsRef.current], }), []); const runPhase = useCallback(async () => { setRunning(true); try { if (phase === 0) { await resetEpisode(); await autoRun("random"); const r = captureResult(); setResults((p) => { const n = [...p]; n[0] = r; return n; }); setPhase(1); } else if (phase === 1) { await resetEpisode(); await autoRun("heuristic"); const r = captureResult(); setResults((p) => { const n = [...p]; n[1] = r; return n; }); setPhase(2); } else if (phase === 2) { await swapProfiles(); await autoRun("trained"); const r = captureResult(); setResults((p) => { const n = [...p]; n[2] = r; return n; }); setPhase(3); } } finally { setRunning(false); } }, [phase, autoRun, resetEpisode, swapProfiles, captureResult]); const restart = () => { setPhase(0); setResults([null, null, null]); }; const currentStep = Math.min(phase, 2); const step = STEPS[currentStep]; const Icon = step.icon; const isRunning = running || globalRunning; // Live trust data during run const trustEntries = observation ? Object.entries(observation.trust_snapshot).sort(([a], [b]) => a.localeCompare(b)) : []; return (
{/* progress dots */}
{[0, 1, 2].map((i) => (
i ? "done" : phase === i ? "active" : ""}`} /> {i < 2 &&
i ? "done" : ""}`} />}
))}
{/* main stage */} {phase < 3 ? ( <>
{step.num}

{step.title}

{step.desc}

{/* Show PREVIOUS result if we have one (comparison view) */} {phase === 1 && results[0] && (
Previous: Random Policy Result
)} {phase === 2 && results[0] && results[1] && (
Before vs After Comparison

Random (Blind)

{results[0].score.toFixed(3)}
{results[0].poisonings} poisonings · {results[0].detections} detections

Heuristic (Trust)

{results[1].score.toFixed(3)}
{results[1].poisonings} poisonings · {results[1].detections} detections
)} ) : ( /* completion screen */ <>
Demo Complete

Trust Calibration Works

Across all three runs, the orchestrator learned to identify and route around adversarial agents — even when specialist identities were reshuffled.

{/* three-way comparison */}
{["Random", "Heuristic", "After Swap"].map((label, i) => { const r = results[i]; return (
{label}
{r ? r.score.toFixed(3) : "—"}
); })}
{/* Final trust comparison */} {results[1] && results[2] && (
Heuristic Final Trust
After Swap Final Trust
)}
)}
{/* Live data panel - shows during runs */} {isRunning && observation && (
Live Trust Scores
Live Events
{events.slice(-8).reverse().map((ev, i) => (
#{ev.step}{" "} {ev.action}{ev.specialist ? `:${ev.specialist}` : ""} = 0.5 ? "var(--green)" : "var(--red)", }}> {ev.outcome === "reset" ? "—" : ev.reward.toFixed(2)}
))}
)}
); } /* ── helper components ─────────────────────────────── */ function TrustBars({ trust }: { trust: Record }) { const entries = Object.entries(trust).sort(([a], [b]) => a.localeCompare(b)); return (
{entries.map(([id, val]) => (
{id}
{val.toFixed(2)}
))}
); } function PhaseResultCard({ result, variant }: { result: PhaseResult; variant: "bad" | "good" }) { return (
Score
{result.score.toFixed(3)}
Poisonings
0 ? "var(--red)" : "var(--ink)" }}> {result.poisonings}
Detections
0 ? "var(--green)" : "var(--ink3)" }}> {result.detections}
); }