Spaces:

ayussssssiiii
/

codecourt

Sleeping

App Files Files Community

codecourt / frontend /src /components /CodeCourtDashboard.jsx

ayussssssiiii

Initial HF Space snapshot

fcb838d about 1 month ago

raw

history blame contribute delete

20 kB

	import { useEffect, useState } from "react";
	import {
	Activity,
	BadgeCheck,
	BarChart3,
	ChevronRight,
	CircleAlert,
	ClipboardList,
	Database,
	FlaskConical,
	Play,
	Server,
	TerminalSquare,
	} from "lucide-react";

	const API_BASE = import.meta.env.VITE_API_BASE_URL ?? "http://127.0.0.1:7860";

	const ARCHETYPES = ["array", "graph", "dp"];
	const DIFFICULTIES = [1, 2, 3];

	const pipelineSteps = [
	"Create a seeded adversarial episode from the OpenEnv backend.",
	"Collect solver completions and send them to the Docker Oracle sandbox.",
	"Score correctness, hidden-test robustness, and anti-gaming signals.",
	"Apply GRPO reward shaping to improve the Solver policy.",
	"Track artifacts and compare baseline vs. trained behavior.",
	];

	function classNames(...values) {
	return values.filter(Boolean).join(" ");
	}

	function formatPercent(value) {
	if (value == null \|\| Number.isNaN(value)) return "n/a";
	return `${(value * 100).toFixed(1)}%`;
	}

	function formatReward(value) {
	if (value == null \|\| Number.isNaN(value)) return "n/a";
	return value.toFixed(2);
	}

	function Pill({ children, tone = "default" }) {
	const tones = {
	default: "border-slate-700 bg-slate-900/70 text-slate-300",
	success: "border-emerald-500/30 bg-emerald-500/10 text-emerald-300",
	danger: "border-rose-500/30 bg-rose-500/10 text-rose-300",
	warning: "border-amber-500/30 bg-amber-500/10 text-amber-300",
	accent: "border-cyan-500/30 bg-cyan-500/10 text-cyan-300",
	};

	return (
	<span
	className={classNames(
	"inline-flex items-center rounded-full border px-3 py-1 text-xs font-medium tracking-wide",
	tones[tone],
	)}
	>
	{children}
	</span>
	);
	}

	function SectionCard({ icon: Icon, title, subtitle, children, action }) {
	return (
	<section className="rounded-3xl border border-slate-800 bg-slate-950/80 shadow-[0_0_0_1px_rgba(15,23,42,0.4),0_30px_80px_rgba(2,8,23,0.55)] backdrop-blur">
	<div className="flex items-start justify-between gap-4 border-b border-slate-800 px-6 py-5">
	<div className="flex items-start gap-4">
	<div className="rounded-2xl border border-cyan-500/20 bg-cyan-500/10 p-3 text-cyan-300">
	<Icon className="h-5 w-5" />
	</div>
	<div>
	<h2 className="text-lg font-semibold text-slate-100">{title}</h2>
	{subtitle ? <p className="mt-1 text-sm text-slate-400">{subtitle}</p> : null}
	</div>
	</div>
	{action}
	</div>
	<div className="px-6 py-5">{children}</div>
	</section>
	);
	}

	function Header({ health }) {
	const healthy = health?.status === "ok";

	return (
	<header className="rounded-[2rem] border border-slate-800 bg-[radial-gradient(circle_at_top_left,_rgba(34,211,238,0.16),_transparent_28%),radial-gradient(circle_at_top_right,_rgba(244,114,182,0.14),_transparent_30%),rgba(2,6,23,0.92)] px-7 py-7 shadow-2xl">
	<div className="flex flex-col gap-5 lg:flex-row lg:items-start lg:justify-between">
	<div className="max-w-3xl">
	<div className="mb-4 flex flex-wrap items-center gap-3">
	<Pill tone="accent">CodeCourt OpenEnv Space</Pill>
	<Pill tone="default">v1.0.0</Pill>
	<Pill tone={healthy ? "success" : "danger"}>
	<Activity className="mr-2 h-3.5 w-3.5" />
	API {healthy ? "Healthy" : "Unavailable"}
	</Pill>
	</div>
	<h1 className="max-w-4xl text-4xl font-semibold tracking-tight text-white sm:text-5xl">
	Adversarial RL arena for code generation, hidden failures, and judge-ready proof.
	</h1>
	<p className="mt-4 max-w-3xl text-sm leading-7 text-slate-300 sm:text-base">
	CodeCourt visualizes the full OpenEnv loop: episode generation, secure code execution,
	GRPO reward shaping, and artifact tracking. This dashboard is designed to make failure,
	intervention, and improvement obvious in a single demo.
	</p>
	</div>

	<div className="grid min-w-[280px] gap-3 rounded-3xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="flex items-center justify-between">
	<span className="text-xs uppercase tracking-[0.2em] text-slate-500">Story Arc</span>
	<ChevronRight className="h-4 w-4 text-slate-600" />
	</div>
	<div className="grid gap-3">
	<div className="rounded-2xl border border-rose-500/20 bg-rose-500/10 p-3">
	<p className="text-xs uppercase tracking-[0.2em] text-rose-300">Before</p>
	<p className="mt-1 text-sm text-slate-200">
	Brute-force solvers fail on hidden adversarial cases.
	</p>
	</div>
	<div className="rounded-2xl border border-amber-500/20 bg-amber-500/10 p-3">
	<p className="text-xs uppercase tracking-[0.2em] text-amber-300">Fix</p>
	<p className="mt-1 text-sm text-slate-200">
	Reward shaping, sandboxed Oracle, and seeded task variation.
	</p>
	</div>
	<div className="rounded-2xl border border-emerald-500/20 bg-emerald-500/10 p-3">
	<p className="text-xs uppercase tracking-[0.2em] text-emerald-300">After</p>
	<p className="mt-1 text-sm text-slate-200">
	Better pass rate, stronger reward, clearer training evidence.
	</p>
	</div>
	</div>
	</div>
	</div>
	</header>
	);
	}

	function EnvironmentConsole({ onCreateEpisode, loading }) {
	const [archetype, setArchetype] = useState("array");
	const [difficulty, setDifficulty] = useState(1);

	return (
	<SectionCard
	icon={Server}
	title="Environment Console"
	subtitle="Configure the OpenEnv episode and create a fresh adversarial task."
	>
	<div className="grid gap-4 xl:grid-cols-2">
	<label className="grid gap-2 text-sm text-slate-300">
	Archetype
	<select
	value={archetype}
	onChange={(event) => setArchetype(event.target.value)}
	className="rounded-2xl border border-slate-700 bg-slate-900 px-4 py-3 text-slate-100 outline-none transition focus:border-cyan-500"
	>
	{ARCHETYPES.map((value) => (
	<option key={value} value={value}>
	{value}
	</option>
	))}
	</select>
	</label>

	<label className="grid gap-2 text-sm text-slate-300">
	Difficulty
	<select
	value={difficulty}
	onChange={(event) => setDifficulty(Number(event.target.value))}
	className="rounded-2xl border border-slate-700 bg-slate-900 px-4 py-3 text-slate-100 outline-none transition focus:border-cyan-500"
	>
	{DIFFICULTIES.map((value) => (
	<option key={value} value={value}>
	{value}
	</option>
	))}
	</select>
	</label>
	</div>

	<div className="mt-5 flex flex-wrap items-center gap-3">
	<button
	type="button"
	onClick={() => onCreateEpisode({ archetype, difficulty })}
	disabled={loading}
	className="inline-flex items-center gap-2 rounded-2xl bg-cyan-400 px-5 py-3 text-sm font-semibold text-slate-950 transition hover:bg-cyan-300 disabled:cursor-not-allowed disabled:opacity-60"
	>
	<Play className="h-4 w-4" />
	{loading ? "Creating..." : "Create Episode"}
	</button>
	<Pill tone="default">OpenEnv Session</Pill>
	</div>
	</SectionCard>
	);
	}

	function BenchmarkSandbox({ onRun, loading, benchmark }) {
	return (
	<SectionCard
	icon={BarChart3}
	title="Benchmark Sandbox"
	subtitle="Run baseline and reference behavior across fresh episodes."
	action={
	<button
	type="button"
	onClick={onRun}
	disabled={loading}
	className="inline-flex items-center gap-2 rounded-2xl border border-slate-700 bg-slate-900 px-4 py-2 text-sm font-medium text-slate-200 transition hover:border-cyan-500 hover:text-white disabled:cursor-not-allowed disabled:opacity-60"
	>
	<FlaskConical className="h-4 w-4" />
	{loading ? "Running..." : "Run Benchmark"}
	</button>
	}
	>
	<div className="overflow-hidden rounded-2xl border border-slate-800">
	<table className="min-w-full divide-y divide-slate-800 text-sm">
	<thead className="bg-slate-900/80 text-left text-slate-400">
	<tr>
	<th className="px-4 py-3 font-medium">Episode</th>
	<th className="px-4 py-3 font-medium">Task</th>
	<th className="px-4 py-3 font-medium">Outcome</th>
	<th className="px-4 py-3 font-medium">Pass Rate</th>
	<th className="px-4 py-3 font-medium">Reward</th>
	</tr>
	</thead>
	<tbody className="divide-y divide-slate-800 bg-slate-950/40">
	{(benchmark?.episodes ?? []).length ? (
	benchmark.episodes.map((episode) => (
	<tr key={episode.episode} className="text-slate-200">
	<td className="px-4 py-3">{episode.episode + 1}</td>
	<td className="px-4 py-3">
	<div className="font-medium">{episode.archetype}</div>
	<div className="text-xs text-slate-500">task {episode.task_id}</div>
	</td>
	<td className="px-4 py-3">
	<Pill tone={episode.outcome === "solver_wins" ? "success" : "danger"}>
	{episode.outcome}
	</Pill>
	</td>
	<td className="px-4 py-3">{formatPercent(episode.solver_pass_rate)}</td>
	<td className="px-4 py-3">{formatReward(episode.solver_reward)}</td>
	</tr>
	))
	) : (
	<tr>
	<td colSpan={5} className="px-4 py-10 text-center text-slate-500">
	No benchmark run yet. Trigger the sandbox to compare baseline vs. reference behavior.
	</td>
	</tr>
	)}
	</tbody>
	</table>
	</div>

	{benchmark?.summary ? (
	<div className="mt-5 grid gap-3 sm:grid-cols-3">
	<div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Avg Pass Rate</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatPercent(benchmark.summary.avg_solver_pass_rate)}
	</div>
	</div>
	<div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Avg Reward</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatReward(benchmark.summary.avg_solver_reward)}
	</div>
	</div>
	<div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Setter Win Rate</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatPercent(benchmark.summary.setter_win_rate)}
	</div>
	</div>
	</div>
	) : null}
	</SectionCard>
	);
	}

	function LiveExecutionArena({ session, lastRun }) {
	const resultTone =
	lastRun?.info?.outcome === "solver_wins"
	? "text-emerald-300"
	: lastRun?.info?.outcome
	? "text-rose-300"
	: "text-slate-500";

	return (
	<SectionCard
	icon={TerminalSquare}
	title="Live Execution Arena"
	subtitle="Terminal-style view of the current problem and latest solver result."
	>
	<div className="grid gap-5 xl:grid-cols-[1.15fr_0.85fr]">
	<div className="rounded-3xl border border-slate-800 bg-[#050816] p-4">
	<div className="mb-4 flex items-center gap-2">
	<span className="h-3 w-3 rounded-full bg-rose-400" />
	<span className="h-3 w-3 rounded-full bg-amber-400" />
	<span className="h-3 w-3 rounded-full bg-emerald-400" />
	<span className="ml-3 text-xs uppercase tracking-[0.25em] text-slate-500">
	Current Problem
	</span>
	</div>
	<pre className="min-h-[280px] overflow-auto whitespace-pre-wrap rounded-2xl border border-slate-800 bg-slate-950 p-4 text-sm leading-7 text-slate-200">
	{session?.problem?.description ??
	"Create an episode to view the generated problem statement and hidden-test-ready task."}
	</pre>
	</div>

	<div className="rounded-3xl border border-slate-800 bg-[#050816] p-4">
	<div className="mb-4 flex items-center justify-between">
	<span className="text-xs uppercase tracking-[0.25em] text-slate-500">Run Results</span>
	<span className={classNames("text-sm font-semibold", resultTone)}>
	{lastRun?.info?.outcome ?? "No execution yet"}
	</span>
	</div>
	<div className="grid gap-3">
	<div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Pass Rate</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatPercent(lastRun?.info?.solver_pass_rate)}
	</div>
	</div>
	<div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Solver Reward</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatReward(lastRun?.solver_reward_info?.reward)}
	</div>
	</div>
	<div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Status</div>
	<div className="mt-2 flex flex-wrap gap-2">
	<Pill tone={lastRun?.info?.setter_valid ? "success" : "danger"}>
	setter {lastRun?.info?.setter_valid ? "valid" : "invalid"}
	</Pill>
	<Pill tone="default">
	hidden {formatPercent(lastRun?.info?.solver_hidden_pass_rate)}
	</Pill>
	</div>
	</div>
	</div>
	</div>
	</div>
	</SectionCard>
	);
	}

	function TrainingPipelineStatus() {
	return (
	<SectionCard
	icon={ClipboardList}
	title="Training Pipeline"
	subtitle="The 5-step GRPO loop visualized for demo clarity."
	>
	<div className="grid gap-3">
	{pipelineSteps.map((step, index) => (
	<div
	key={step}
	className="flex items-start gap-4 rounded-2xl border border-slate-800 bg-slate-900/60 p-4"
	>
	<div className="flex h-9 w-9 shrink-0 items-center justify-center rounded-2xl bg-cyan-400 font-semibold text-slate-950">
	{index + 1}
	</div>
	<div>
	<p className="text-sm font-medium text-slate-100">{step}</p>
	</div>
	</div>
	))}
	</div>
	</SectionCard>
	);
	}

	function ArtifactTracker({ artifacts }) {
	const statusItems = [
	["Baseline", artifacts?.baseline_available],
	["Manifest", artifacts?.training_manifest_available],
	["GRPO Logs", artifacts?.training_log_available],
	["Plots", artifacts?.plots_available],
	];

	return (
	<SectionCard
	icon={Database}
	title="Artifact Tracker"
	subtitle="Judge-facing evidence that the training loop is real and measurable."
	>
	<div className="flex flex-wrap gap-3">
	{statusItems.map(([label, present]) => (
	<Pill key={label} tone={present ? "success" : "danger"}>
	{present ? <BadgeCheck className="mr-2 h-3.5 w-3.5" /> : <CircleAlert className="mr-2 h-3.5 w-3.5" />}
	{label} {present ? "present" : "missing"}
	</Pill>
	))}
	</div>

	<div className="mt-5 grid gap-3 md:grid-cols-2">
	<div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Baseline Pass Rate</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatPercent(artifacts?.baseline_summary?.avg_solver_pass_rate)}
	</div>
	</div>
	<div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
	<div className="text-xs uppercase tracking-[0.18em] text-slate-500">Latest Reward</div>
	<div className="mt-2 text-2xl font-semibold text-white">
	{formatReward(artifacts?.latest_training_metrics?.reward)}
	</div>
	</div>
	</div>
	</SectionCard>
	);
	}

	export default function CodeCourtDashboard() {
	const [health, setHealth] = useState(null);
	const [artifacts, setArtifacts] = useState(null);
	const [session, setSession] = useState(null);
	const [lastRun, setLastRun] = useState(null);
	const [benchmark, setBenchmark] = useState(null);
	const [creatingEpisode, setCreatingEpisode] = useState(false);
	const [runningBenchmark, setRunningBenchmark] = useState(false);

	useEffect(() => {
	async function bootstrap() {
	try {
	const [healthResponse, artifactsResponse] = await Promise.all([
	fetch(`${API_BASE}/api/health`).then((response) => response.json()),
	fetch(`${API_BASE}/api/artifacts`).then((response) => response.json()),
	]);
	setHealth(healthResponse);
	setArtifacts(artifactsResponse);
	} catch (error) {
	setHealth({ status: "error", message: error.message });
	}
	}

	bootstrap();
	}, []);

	async function handleCreateEpisode({ archetype, difficulty }) {
	setCreatingEpisode(true);
	try {
	const response = await fetch(`${API_BASE}/api/session`, {
	method: "POST",
	headers: { "Content-Type": "application/json" },
	body: JSON.stringify({
	archetype,
	difficulty,
	seed: 42,
	}),
	});
	const data = await response.json();
	setSession(data.state);
	setLastRun(null);
	} finally {
	setCreatingEpisode(false);
	}
	}

	async function handleRunBenchmark() {
	setRunningBenchmark(true);
	try {
	const response = await fetch(`${API_BASE}/api/benchmark`, {
	method: "POST",
	headers: { "Content-Type": "application/json" },
	body: JSON.stringify({
	episodes: 6,
	solver_mode: "brute_force",
	seed: 42,
	}),
	});
	const data = await response.json();
	setBenchmark(data);
	} finally {
	setRunningBenchmark(false);
	}
	}

	return (
	<div className="min-h-screen bg-[radial-gradient(circle_at_top_left,_rgba(34,211,238,0.12),_transparent_25%),radial-gradient(circle_at_top_right,_rgba(168,85,247,0.14),_transparent_30%),#020617] px-4 py-6 text-slate-100 sm:px-6 lg:px-8">
	<div className="mx-auto flex max-w-7xl flex-col gap-6">
	<Header health={health} />

	<div className="grid gap-6 xl:grid-cols-[1.05fr_0.95fr]">
	<div className="grid gap-6">
	<EnvironmentConsole onCreateEpisode={handleCreateEpisode} loading={creatingEpisode} />
	<BenchmarkSandbox onRun={handleRunBenchmark} loading={runningBenchmark} benchmark={benchmark} />
	</div>

	<div className="grid gap-6">
	<LiveExecutionArena session={session} lastRun={lastRun} />
	<TrainingPipelineStatus />
	<ArtifactTracker artifacts={artifacts} />
	</div>
	</div>
	</div>
	</div>
	);
	}