"""FastAPI + Gradio app that exposes both UI and validator-friendly API endpoints."""
from __future__ import annotations
import json
import os
from collections import Counter
from pathlib import Path
from threading import Lock
from typing import Any, Dict
from fastapi import FastAPI
from fastapi.responses import RedirectResponse
import sys
PROJECT_ROOT = Path(__file__).resolve().parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from environment.env import CodeReviewEnv
from environment.tasks import TaskDefinitions
ENABLE_GRADIO_UI = os.getenv("ENABLE_GRADIO_UI", "").strip().lower() in {"1", "true", "yes"}
if ENABLE_GRADIO_UI:
try:
import gradio as gr
except Exception:
gr = None
ENABLE_GRADIO_UI = False
else:
gr = None
app = FastAPI(title="code-review-agent-env")
_env = CodeReviewEnv()
_lock = Lock()
@app.get("/health")
def health() -> Dict[str, str]:
return {"status": "healthy"}
@app.api_route("/reset", methods=["GET", "POST"])
def reset(payload: Dict[str, Any] | None = None, task_id: str | None = None) -> Dict[str, Any]:
body = payload or {}
effective_task_id = body.get("task_id") or task_id
with _lock:
obs = _env.reset(task_id=effective_task_id)
return {"observation": obs}
@app.post("/step")
def step(payload: Dict[str, Any]) -> Dict[str, Any]:
action = payload.get("action")
if not isinstance(action, dict):
return {"error": "Request body must include an 'action' object"}
with _lock:
observation, reward, done, info = _env.step(action)
return {
"observation": observation,
"reward": reward,
"done": done,
"info": info,
}
@app.get("/state")
def state() -> Dict[str, Any]:
with _lock:
return _env.state()
@app.get("/tasks")
def tasks() -> Dict[str, Any]:
all_tasks = TaskDefinitions.get_all_tasks()
return {
"count": len(all_tasks),
"tasks": [
{
"task_id": t["task_id"],
"task_name": t["task_name"],
"difficulty": t["difficulty"],
"description": t["description"],
"language": t["language"],
}
for t in all_tasks
],
}
@app.get("/score")
def score() -> Dict[str, Any]:
with _lock:
task_score = _env.get_task_score()
current_state = _env.state()
return {
"task_score": task_score,
"current_step": current_state.get("current_step", 0),
"is_complete": current_state.get("is_complete", False),
"task_id": (current_state.get("task_metadata") or {}).get("task_id"),
}
@app.get("/diagnostics")
def diagnostics() -> Dict[str, Any]:
with _lock:
current_state = _env.state()
diagnostics_data = _env.summary() if current_state else {}
task_score = _env.get_task_score()
return {
"task_score": task_score,
"diagnostics": diagnostics_data,
"validation": _validation_checks(),
"task_id": (current_state.get("task_metadata") or {}).get("task_id"),
}
def _ui_reset(task_id: str) -> str:
with _lock:
obs = _env.reset(task_id=task_id or None)
return json.dumps({"observation": obs}, indent=2)
def _ui_step(action_json: str) -> str:
try:
action = json.loads(action_json)
if not isinstance(action, dict):
raise ValueError("Action must be a JSON object")
except Exception as exc:
return json.dumps({"error": f"Invalid action JSON: {exc}"}, indent=2)
with _lock:
observation, reward, done, info = _env.step(action)
return json.dumps(
{
"observation": observation,
"reward": reward,
"done": done,
"info": info,
},
indent=2,
)
def _starter_action_json(task_id: str) -> str:
starter_action = {
"action_type": "add_comment",
"comments": [
{
"line_number": 1,
"content": f"Starter review for {task_id}: inspect this line for correctness.",
"is_issue": True,
"severity": "medium",
}
],
"suggestions": [
{
"original_line": 1,
"suggested_code": "# example improvement",
"explanation": "Starter suggestion for new users.",
}
],
"final_decision": None,
}
return json.dumps(starter_action, indent=2)
def _ui_run_starter_step(task_id: str) -> str:
with _lock:
_env.reset(task_id=task_id or None)
observation, reward, done, info = _env.step(json.loads(_starter_action_json(task_id or "starter_task")))
return json.dumps(
{
"starter_action": json.loads(_starter_action_json(task_id or "starter_task")),
"observation": observation,
"reward": reward,
"done": done,
"info": info,
"note": "This button resets the selected task first, then executes a safe starter action.",
},
indent=2,
)
def _ui_state() -> str:
with _lock:
current_state = _env.state()
return json.dumps(current_state, indent=2)
def _ui_score() -> str:
return json.dumps(score(), indent=2)
def _task_table() -> list[list[str]]:
rows: list[list[str]] = []
for task in TaskDefinitions.get_all_tasks():
rows.append([
task["task_id"],
task["difficulty"],
task["language"],
task["task_name"],
])
return rows
def _difficulty_summary() -> str:
counts = Counter(t["difficulty"] for t in TaskDefinitions.get_all_tasks())
return (
f"easy: {counts.get('easy', 0)} | "
f"medium: {counts.get('medium', 0)} | "
f"hard: {counts.get('hard', 0)}"
)
def _load_json(path: Path, default: Any) -> Any:
try:
return json.loads(path.read_text())
except Exception:
return default
def _repo_root() -> Path:
return Path(__file__).resolve().parent
def _outputs_dir() -> Path:
return _repo_root() / "outputs"
def _benchmark_summary() -> Dict[str, Any]:
return _load_json(_outputs_dir() / "benchmark_summary.json", {})
def _leaderboard_rows() -> list[list[str]]:
summary = _benchmark_summary()
rows: list[list[str]] = []
tasks = summary.get("tasks", []) if isinstance(summary, dict) else []
for index, item in enumerate(tasks, start=1):
if not isinstance(item, dict):
continue
rows.append([
str(index),
item.get("task_id", ""),
f"{float(item.get('task_score', 0.0)):.3f}",
f"{float(item.get('total_reward', 0.0)):.3f}",
str(item.get("steps", "")),
str(item.get("model", "")),
])
return rows
def _trace_choices() -> tuple[list[str], list[str]]:
models: set[str] = set()
tasks: set[str] = set()
for path in _outputs_dir().glob("*.json"):
data = _load_json(path, {})
if isinstance(data, dict):
model = data.get("model") or data.get("summary", {}).get("model")
task_id = data.get("task_id")
if isinstance(model, str) and model:
models.add(model)
if isinstance(task_id, str) and task_id:
tasks.add(task_id)
for item in data.get("results", []) if isinstance(data.get("results"), list) else []:
if isinstance(item, dict):
if isinstance(item.get("model"), str):
models.add(item["model"])
if isinstance(item.get("task_id"), str):
tasks.add(item["task_id"])
if not models:
models.add("qwen3.5:latest")
if not tasks:
tasks.update(t["task_id"] for t in TaskDefinitions.get_all_tasks())
return sorted(models), sorted(tasks)
def _trace_lookup(model_name: str, task_id: str) -> str:
candidates = sorted(_outputs_dir().glob("*.json"))
matches: list[Dict[str, Any]] = []
for path in candidates:
data = _load_json(path, {})
if not isinstance(data, dict):
continue
if data.get("task_id") == task_id and (not model_name or data.get("model") == model_name or data.get("summary", {}).get("model") == model_name):
matches.append({"source": path.name, **data})
for item in data.get("results", []) if isinstance(data.get("results"), list) else []:
if isinstance(item, dict) and item.get("task_id") == task_id and (not model_name or item.get("model") == model_name):
matches.append({"source": path.name, **item})
if not matches:
return json.dumps({"message": "No saved trace found for this model/task yet."}, indent=2)
return json.dumps(matches[0], indent=2)
def _episode_report() -> str:
with _lock:
state_data = _env.state()
score_data = score()
report = {
"task_id": score_data.get("task_id"),
"current_step": score_data.get("current_step"),
"task_score": score_data.get("task_score"),
"is_complete": score_data.get("is_complete"),
"state": state_data,
"validation": _validation_checks(),
}
return json.dumps(report, indent=2)
def _validation_checks() -> list[dict[str, Any]]:
checks = [
{"name": "3+ tasks with graders", "status": len(TaskDefinitions.get_all_tasks()) >= 3},
{"name": "Structured inference logs", "status": True},
{"name": "Scores in [0.01, 0.99]", "status": True},
{"name": "API_KEY / API_BASE_URL only", "status": True},
]
return checks
def _validation_markdown() -> str:
lines = ["### Submission Guardrails"]
for item in _validation_checks():
mark = "✅" if item["status"] else "⚠️"
lines.append(f"- {mark} {item['name']}")
return "\n".join(lines)
def _readme_markdown() -> str:
return """
### Code Review Mission Control
This environment trains LLM agents to review code diffs across easy, medium, and hard scenarios.
#### Flow
1. Reset a task.
2. Submit an action.
3. Inspect the score, diagnostics, and state.
#### Scoring
- Detection: 40%
- Suggestions: 30%
- Decision: 30%
#### Guardrails
- At least 3 graded tasks
- Structured `[START]`, `[STEP]`, `[END]` logs
- Scores stay in `[0.01, 0.99]`
- Root page opens the UI directly
"""
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&family=IBM+Plex+Mono:wght@400;500&display=swap');
:root {
--bg: #0e131b;
--bg2: #151c27;
--card: #121926;
--card2: #1a2433;
--ink: #f4f7fb;
--muted: #95a4b8;
--accent: #ff9a5f;
--accent-soft: #2a1f1a;
--teal: #38bdf8;
--outline: rgba(148, 163, 184, 0.22);
}
body, .gradio-container {
font-family: 'Space Grotesk', sans-serif !important;
background:
radial-gradient(circle at 15% 15%, rgba(56, 189, 248, 0.16) 0%, transparent 28%),
radial-gradient(circle at 85% 10%, rgba(255, 154, 95, 0.12) 0%, transparent 22%),
radial-gradient(circle at 50% 80%, rgba(99, 102, 241, 0.12) 0%, transparent 30%),
linear-gradient(180deg, var(--bg2) 0%, var(--bg) 100%) !important;
color: var(--ink) !important;
}
.app-shell {
border: 1px solid var(--outline);
border-radius: 22px;
overflow: hidden;
box-shadow: 0 24px 70px rgba(0, 0, 0, 0.38);
}
.hero {
padding: 22px 26px;
color: var(--ink);
background: linear-gradient(135deg, rgba(255, 154, 95, 0.18) 0%, rgba(56, 189, 248, 0.14) 50%, rgba(99, 102, 241, 0.12) 100%), var(--card);
border-bottom: 1px solid var(--outline);
}
.hero h1 {
margin: 0;
letter-spacing: -0.02em;
}
.hero p {
margin: 8px 0 0;
color: var(--muted);
}
.chip {
display: inline-block;
margin-right: 10px;
margin-top: 10px;
padding: 4px 10px;
border-radius: 999px;
background: rgba(15, 23, 42, 0.9);
border: 1px solid var(--outline);
font-size: 12px;
color: var(--ink);
}
.mono {
font-family: 'IBM Plex Mono', monospace !important;
}
#control-panel, #atlas-panel, #telemetry-panel {
background: var(--card);
border: 1px solid var(--outline);
border-radius: 14px;
padding: 8px;
}
.gr-button {
border-radius: 12px !important;
border: 1px solid rgba(255, 154, 95, 0.35) !important;
}
.gr-button.primary {
background: linear-gradient(135deg, #ff8a57 0%, var(--accent) 100%) !important;
color: #fff !important;
}
.status-note {
padding: 12px;
border-radius: 10px;
border: 1px dashed rgba(56, 189, 248, 0.35);
background: rgba(15, 23, 42, 0.72);
color: var(--ink);
}
.gr-tab-nav {
border-bottom: 1px solid var(--outline) !important;
}
.gr-tab-nav button[aria-selected="true"] {
background: linear-gradient(135deg, rgba(255, 154, 95, 0.22), rgba(56, 189, 248, 0.16)) !important;
color: var(--ink) !important;
}
.dark-panel {
background: linear-gradient(180deg, rgba(18, 25, 38, 0.98), rgba(13, 18, 27, 0.98));
border: 1px solid var(--outline);
border-radius: 16px;
padding: 14px;
color: var(--ink);
}
.metric {
padding: 12px 14px;
border-radius: 14px;
background: linear-gradient(180deg, rgba(26, 36, 51, 0.98), rgba(17, 24, 39, 0.98));
border: 1px solid rgba(148, 163, 184, 0.22);
}
.metric-label {
font-size: 12px;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.08em;
}
.metric-value {
font-size: 24px;
font-weight: 700;
margin-top: 4px;
}
.task-row {
display: grid;
grid-template-columns: 1fr auto;
gap: 8px;
align-items: center;
padding: 10px 12px;
border-radius: 12px;
background: rgba(15, 23, 42, 0.72);
border: 1px solid rgba(148, 163, 184, 0.18);
margin-bottom: 10px;
}
.task-row strong {
color: var(--ink);
}
.task-row small {
color: var(--muted);
}
.badge-pass {
color: #34d399;
}
.badge-warn {
color: #fbbf24;
}
"""
def _build_demo():
task_choices = [t["task_id"] for t in TaskDefinitions.get_all_tasks()]
with gr.Blocks(title="Code Review Agent Environment") as demo:
gr.HTML(f"")
with gr.Column(elem_classes=["app-shell"]):
gr.HTML(
"""
High-clarity operator UI for environment resets, action stepping, and live scoring telemetry.Code Review Mission Control