File size: 19,999 Bytes
fcb838d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
import { useEffect, useState } from "react";
import {
  Activity,
  BadgeCheck,
  BarChart3,
  ChevronRight,
  CircleAlert,
  ClipboardList,
  Database,
  FlaskConical,
  Play,
  Server,
  TerminalSquare,
} from "lucide-react";

const API_BASE = import.meta.env.VITE_API_BASE_URL ?? "http://127.0.0.1:7860";

const ARCHETYPES = ["array", "graph", "dp"];
const DIFFICULTIES = [1, 2, 3];

const pipelineSteps = [
  "Create a seeded adversarial episode from the OpenEnv backend.",
  "Collect solver completions and send them to the Docker Oracle sandbox.",
  "Score correctness, hidden-test robustness, and anti-gaming signals.",
  "Apply GRPO reward shaping to improve the Solver policy.",
  "Track artifacts and compare baseline vs. trained behavior.",
];

function classNames(...values) {
  return values.filter(Boolean).join(" ");
}

function formatPercent(value) {
  if (value == null || Number.isNaN(value)) return "n/a";
  return `${(value * 100).toFixed(1)}%`;
}

function formatReward(value) {
  if (value == null || Number.isNaN(value)) return "n/a";
  return value.toFixed(2);
}

function Pill({ children, tone = "default" }) {
  const tones = {
    default: "border-slate-700 bg-slate-900/70 text-slate-300",
    success: "border-emerald-500/30 bg-emerald-500/10 text-emerald-300",
    danger: "border-rose-500/30 bg-rose-500/10 text-rose-300",
    warning: "border-amber-500/30 bg-amber-500/10 text-amber-300",
    accent: "border-cyan-500/30 bg-cyan-500/10 text-cyan-300",
  };

  return (
    <span
      className={classNames(
        "inline-flex items-center rounded-full border px-3 py-1 text-xs font-medium tracking-wide",
        tones[tone],
      )}
    >
      {children}
    </span>
  );
}

function SectionCard({ icon: Icon, title, subtitle, children, action }) {
  return (
    <section className="rounded-3xl border border-slate-800 bg-slate-950/80 shadow-[0_0_0_1px_rgba(15,23,42,0.4),0_30px_80px_rgba(2,8,23,0.55)] backdrop-blur">
      <div className="flex items-start justify-between gap-4 border-b border-slate-800 px-6 py-5">
        <div className="flex items-start gap-4">
          <div className="rounded-2xl border border-cyan-500/20 bg-cyan-500/10 p-3 text-cyan-300">
            <Icon className="h-5 w-5" />
          </div>
          <div>
            <h2 className="text-lg font-semibold text-slate-100">{title}</h2>
            {subtitle ? <p className="mt-1 text-sm text-slate-400">{subtitle}</p> : null}
          </div>
        </div>
        {action}
      </div>
      <div className="px-6 py-5">{children}</div>
    </section>
  );
}

function Header({ health }) {
  const healthy = health?.status === "ok";

  return (
    <header className="rounded-[2rem] border border-slate-800 bg-[radial-gradient(circle_at_top_left,_rgba(34,211,238,0.16),_transparent_28%),radial-gradient(circle_at_top_right,_rgba(244,114,182,0.14),_transparent_30%),rgba(2,6,23,0.92)] px-7 py-7 shadow-2xl">
      <div className="flex flex-col gap-5 lg:flex-row lg:items-start lg:justify-between">
        <div className="max-w-3xl">
          <div className="mb-4 flex flex-wrap items-center gap-3">
            <Pill tone="accent">CodeCourt OpenEnv Space</Pill>
            <Pill tone="default">v1.0.0</Pill>
            <Pill tone={healthy ? "success" : "danger"}>
              <Activity className="mr-2 h-3.5 w-3.5" />
              API {healthy ? "Healthy" : "Unavailable"}
            </Pill>
          </div>
          <h1 className="max-w-4xl text-4xl font-semibold tracking-tight text-white sm:text-5xl">
            Adversarial RL arena for code generation, hidden failures, and judge-ready proof.
          </h1>
          <p className="mt-4 max-w-3xl text-sm leading-7 text-slate-300 sm:text-base">
            CodeCourt visualizes the full OpenEnv loop: episode generation, secure code execution,
            GRPO reward shaping, and artifact tracking. This dashboard is designed to make failure,
            intervention, and improvement obvious in a single demo.
          </p>
        </div>

        <div className="grid min-w-[280px] gap-3 rounded-3xl border border-slate-800 bg-slate-900/70 p-4">
          <div className="flex items-center justify-between">
            <span className="text-xs uppercase tracking-[0.2em] text-slate-500">Story Arc</span>
            <ChevronRight className="h-4 w-4 text-slate-600" />
          </div>
          <div className="grid gap-3">
            <div className="rounded-2xl border border-rose-500/20 bg-rose-500/10 p-3">
              <p className="text-xs uppercase tracking-[0.2em] text-rose-300">Before</p>
              <p className="mt-1 text-sm text-slate-200">
                Brute-force solvers fail on hidden adversarial cases.
              </p>
            </div>
            <div className="rounded-2xl border border-amber-500/20 bg-amber-500/10 p-3">
              <p className="text-xs uppercase tracking-[0.2em] text-amber-300">Fix</p>
              <p className="mt-1 text-sm text-slate-200">
                Reward shaping, sandboxed Oracle, and seeded task variation.
              </p>
            </div>
            <div className="rounded-2xl border border-emerald-500/20 bg-emerald-500/10 p-3">
              <p className="text-xs uppercase tracking-[0.2em] text-emerald-300">After</p>
              <p className="mt-1 text-sm text-slate-200">
                Better pass rate, stronger reward, clearer training evidence.
              </p>
            </div>
          </div>
        </div>
      </div>
    </header>
  );
}

function EnvironmentConsole({ onCreateEpisode, loading }) {
  const [archetype, setArchetype] = useState("array");
  const [difficulty, setDifficulty] = useState(1);

  return (
    <SectionCard
      icon={Server}
      title="Environment Console"
      subtitle="Configure the OpenEnv episode and create a fresh adversarial task."
    >
      <div className="grid gap-4 xl:grid-cols-2">
        <label className="grid gap-2 text-sm text-slate-300">
          Archetype
          <select
            value={archetype}
            onChange={(event) => setArchetype(event.target.value)}
            className="rounded-2xl border border-slate-700 bg-slate-900 px-4 py-3 text-slate-100 outline-none transition focus:border-cyan-500"
          >
            {ARCHETYPES.map((value) => (
              <option key={value} value={value}>
                {value}
              </option>
            ))}
          </select>
        </label>

        <label className="grid gap-2 text-sm text-slate-300">
          Difficulty
          <select
            value={difficulty}
            onChange={(event) => setDifficulty(Number(event.target.value))}
            className="rounded-2xl border border-slate-700 bg-slate-900 px-4 py-3 text-slate-100 outline-none transition focus:border-cyan-500"
          >
            {DIFFICULTIES.map((value) => (
              <option key={value} value={value}>
                {value}
              </option>
            ))}
          </select>
        </label>
      </div>

      <div className="mt-5 flex flex-wrap items-center gap-3">
        <button
          type="button"
          onClick={() => onCreateEpisode({ archetype, difficulty })}
          disabled={loading}
          className="inline-flex items-center gap-2 rounded-2xl bg-cyan-400 px-5 py-3 text-sm font-semibold text-slate-950 transition hover:bg-cyan-300 disabled:cursor-not-allowed disabled:opacity-60"
        >
          <Play className="h-4 w-4" />
          {loading ? "Creating..." : "Create Episode"}
        </button>
        <Pill tone="default">OpenEnv Session</Pill>
      </div>
    </SectionCard>
  );
}

function BenchmarkSandbox({ onRun, loading, benchmark }) {
  return (
    <SectionCard
      icon={BarChart3}
      title="Benchmark Sandbox"
      subtitle="Run baseline and reference behavior across fresh episodes."
      action={
        <button
          type="button"
          onClick={onRun}
          disabled={loading}
          className="inline-flex items-center gap-2 rounded-2xl border border-slate-700 bg-slate-900 px-4 py-2 text-sm font-medium text-slate-200 transition hover:border-cyan-500 hover:text-white disabled:cursor-not-allowed disabled:opacity-60"
        >
          <FlaskConical className="h-4 w-4" />
          {loading ? "Running..." : "Run Benchmark"}
        </button>
      }
    >
      <div className="overflow-hidden rounded-2xl border border-slate-800">
        <table className="min-w-full divide-y divide-slate-800 text-sm">
          <thead className="bg-slate-900/80 text-left text-slate-400">
            <tr>
              <th className="px-4 py-3 font-medium">Episode</th>
              <th className="px-4 py-3 font-medium">Task</th>
              <th className="px-4 py-3 font-medium">Outcome</th>
              <th className="px-4 py-3 font-medium">Pass Rate</th>
              <th className="px-4 py-3 font-medium">Reward</th>
            </tr>
          </thead>
          <tbody className="divide-y divide-slate-800 bg-slate-950/40">
            {(benchmark?.episodes ?? []).length ? (
              benchmark.episodes.map((episode) => (
                <tr key={episode.episode} className="text-slate-200">
                  <td className="px-4 py-3">{episode.episode + 1}</td>
                  <td className="px-4 py-3">
                    <div className="font-medium">{episode.archetype}</div>
                    <div className="text-xs text-slate-500">task {episode.task_id}</div>
                  </td>
                  <td className="px-4 py-3">
                    <Pill tone={episode.outcome === "solver_wins" ? "success" : "danger"}>
                      {episode.outcome}
                    </Pill>
                  </td>
                  <td className="px-4 py-3">{formatPercent(episode.solver_pass_rate)}</td>
                  <td className="px-4 py-3">{formatReward(episode.solver_reward)}</td>
                </tr>
              ))
            ) : (
              <tr>
                <td colSpan={5} className="px-4 py-10 text-center text-slate-500">
                  No benchmark run yet. Trigger the sandbox to compare baseline vs. reference behavior.
                </td>
              </tr>
            )}
          </tbody>
        </table>
      </div>

      {benchmark?.summary ? (
        <div className="mt-5 grid gap-3 sm:grid-cols-3">
          <div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
            <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Avg Pass Rate</div>
            <div className="mt-2 text-2xl font-semibold text-white">
              {formatPercent(benchmark.summary.avg_solver_pass_rate)}
            </div>
          </div>
          <div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
            <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Avg Reward</div>
            <div className="mt-2 text-2xl font-semibold text-white">
              {formatReward(benchmark.summary.avg_solver_reward)}
            </div>
          </div>
          <div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
            <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Setter Win Rate</div>
            <div className="mt-2 text-2xl font-semibold text-white">
              {formatPercent(benchmark.summary.setter_win_rate)}
            </div>
          </div>
        </div>
      ) : null}
    </SectionCard>
  );
}

function LiveExecutionArena({ session, lastRun }) {
  const resultTone =
    lastRun?.info?.outcome === "solver_wins"
      ? "text-emerald-300"
      : lastRun?.info?.outcome
        ? "text-rose-300"
        : "text-slate-500";

  return (
    <SectionCard
      icon={TerminalSquare}
      title="Live Execution Arena"
      subtitle="Terminal-style view of the current problem and latest solver result."
    >
      <div className="grid gap-5 xl:grid-cols-[1.15fr_0.85fr]">
        <div className="rounded-3xl border border-slate-800 bg-[#050816] p-4">
          <div className="mb-4 flex items-center gap-2">
            <span className="h-3 w-3 rounded-full bg-rose-400" />
            <span className="h-3 w-3 rounded-full bg-amber-400" />
            <span className="h-3 w-3 rounded-full bg-emerald-400" />
            <span className="ml-3 text-xs uppercase tracking-[0.25em] text-slate-500">
              Current Problem
            </span>
          </div>
          <pre className="min-h-[280px] overflow-auto whitespace-pre-wrap rounded-2xl border border-slate-800 bg-slate-950 p-4 text-sm leading-7 text-slate-200">
            {session?.problem?.description ??
              "Create an episode to view the generated problem statement and hidden-test-ready task."}
          </pre>
        </div>

        <div className="rounded-3xl border border-slate-800 bg-[#050816] p-4">
          <div className="mb-4 flex items-center justify-between">
            <span className="text-xs uppercase tracking-[0.25em] text-slate-500">Run Results</span>
            <span className={classNames("text-sm font-semibold", resultTone)}>
              {lastRun?.info?.outcome ?? "No execution yet"}
            </span>
          </div>
          <div className="grid gap-3">
            <div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
              <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Pass Rate</div>
              <div className="mt-2 text-2xl font-semibold text-white">
                {formatPercent(lastRun?.info?.solver_pass_rate)}
              </div>
            </div>
            <div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
              <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Solver Reward</div>
              <div className="mt-2 text-2xl font-semibold text-white">
                {formatReward(lastRun?.solver_reward_info?.reward)}
              </div>
            </div>
            <div className="rounded-2xl border border-slate-800 bg-slate-950 p-4">
              <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Status</div>
              <div className="mt-2 flex flex-wrap gap-2">
                <Pill tone={lastRun?.info?.setter_valid ? "success" : "danger"}>
                  setter {lastRun?.info?.setter_valid ? "valid" : "invalid"}
                </Pill>
                <Pill tone="default">
                  hidden {formatPercent(lastRun?.info?.solver_hidden_pass_rate)}
                </Pill>
              </div>
            </div>
          </div>
        </div>
      </div>
    </SectionCard>
  );
}

function TrainingPipelineStatus() {
  return (
    <SectionCard
      icon={ClipboardList}
      title="Training Pipeline"
      subtitle="The 5-step GRPO loop visualized for demo clarity."
    >
      <div className="grid gap-3">
        {pipelineSteps.map((step, index) => (
          <div
            key={step}
            className="flex items-start gap-4 rounded-2xl border border-slate-800 bg-slate-900/60 p-4"
          >
            <div className="flex h-9 w-9 shrink-0 items-center justify-center rounded-2xl bg-cyan-400 font-semibold text-slate-950">
              {index + 1}
            </div>
            <div>
              <p className="text-sm font-medium text-slate-100">{step}</p>
            </div>
          </div>
        ))}
      </div>
    </SectionCard>
  );
}

function ArtifactTracker({ artifacts }) {
  const statusItems = [
    ["Baseline", artifacts?.baseline_available],
    ["Manifest", artifacts?.training_manifest_available],
    ["GRPO Logs", artifacts?.training_log_available],
    ["Plots", artifacts?.plots_available],
  ];

  return (
    <SectionCard
      icon={Database}
      title="Artifact Tracker"
      subtitle="Judge-facing evidence that the training loop is real and measurable."
    >
      <div className="flex flex-wrap gap-3">
        {statusItems.map(([label, present]) => (
          <Pill key={label} tone={present ? "success" : "danger"}>
            {present ? <BadgeCheck className="mr-2 h-3.5 w-3.5" /> : <CircleAlert className="mr-2 h-3.5 w-3.5" />}
            {label} {present ? "present" : "missing"}
          </Pill>
        ))}
      </div>

      <div className="mt-5 grid gap-3 md:grid-cols-2">
        <div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
          <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Baseline Pass Rate</div>
          <div className="mt-2 text-2xl font-semibold text-white">
            {formatPercent(artifacts?.baseline_summary?.avg_solver_pass_rate)}
          </div>
        </div>
        <div className="rounded-2xl border border-slate-800 bg-slate-900/70 p-4">
          <div className="text-xs uppercase tracking-[0.18em] text-slate-500">Latest Reward</div>
          <div className="mt-2 text-2xl font-semibold text-white">
            {formatReward(artifacts?.latest_training_metrics?.reward)}
          </div>
        </div>
      </div>
    </SectionCard>
  );
}

export default function CodeCourtDashboard() {
  const [health, setHealth] = useState(null);
  const [artifacts, setArtifacts] = useState(null);
  const [session, setSession] = useState(null);
  const [lastRun, setLastRun] = useState(null);
  const [benchmark, setBenchmark] = useState(null);
  const [creatingEpisode, setCreatingEpisode] = useState(false);
  const [runningBenchmark, setRunningBenchmark] = useState(false);

  useEffect(() => {
    async function bootstrap() {
      try {
        const [healthResponse, artifactsResponse] = await Promise.all([
          fetch(`${API_BASE}/api/health`).then((response) => response.json()),
          fetch(`${API_BASE}/api/artifacts`).then((response) => response.json()),
        ]);
        setHealth(healthResponse);
        setArtifacts(artifactsResponse);
      } catch (error) {
        setHealth({ status: "error", message: error.message });
      }
    }

    bootstrap();
  }, []);

  async function handleCreateEpisode({ archetype, difficulty }) {
    setCreatingEpisode(true);
    try {
      const response = await fetch(`${API_BASE}/api/session`, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          archetype,
          difficulty,
          seed: 42,
        }),
      });
      const data = await response.json();
      setSession(data.state);
      setLastRun(null);
    } finally {
      setCreatingEpisode(false);
    }
  }

  async function handleRunBenchmark() {
    setRunningBenchmark(true);
    try {
      const response = await fetch(`${API_BASE}/api/benchmark`, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          episodes: 6,
          solver_mode: "brute_force",
          seed: 42,
        }),
      });
      const data = await response.json();
      setBenchmark(data);
    } finally {
      setRunningBenchmark(false);
    }
  }

  return (
    <div className="min-h-screen bg-[radial-gradient(circle_at_top_left,_rgba(34,211,238,0.12),_transparent_25%),radial-gradient(circle_at_top_right,_rgba(168,85,247,0.14),_transparent_30%),#020617] px-4 py-6 text-slate-100 sm:px-6 lg:px-8">
      <div className="mx-auto flex max-w-7xl flex-col gap-6">
        <Header health={health} />

        <div className="grid gap-6 xl:grid-cols-[1.05fr_0.95fr]">
          <div className="grid gap-6">
            <EnvironmentConsole onCreateEpisode={handleCreateEpisode} loading={creatingEpisode} />
            <BenchmarkSandbox onRun={handleRunBenchmark} loading={runningBenchmark} benchmark={benchmark} />
          </div>

          <div className="grid gap-6">
            <LiveExecutionArena session={session} lastRun={lastRun} />
            <TrainingPipelineStatus />
            <ArtifactTracker artifacts={artifacts} />
          </div>
        </div>
      </div>
    </div>
  );
}