import json
from typing import Any, Callable
import gradio as gr
def create_custom_ui(env: Any, action_cls: Any, observation_cls: Any) -> gr.Blocks:
# A dark, cyber-security focused theme mapping closely to Voyager's,
# but tailored for PrivilegeDesk (blues, purples, neon green).
theme = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
).set(
body_background_fill="*neutral_950",
body_background_fill_dark="*neutral_950",
block_background_fill="*neutral_900",
block_background_fill_dark="*neutral_900",
block_border_color="*neutral_800",
block_border_color_dark="*neutral_800",
block_label_background_fill="*neutral_800",
button_primary_background_fill="*primary_600",
button_primary_background_fill_dark="*primary_600",
button_secondary_background_fill="*primary_800",
button_secondary_background_fill_dark="*primary_800",
border_color_primary="*neutral_800",
background_fill_secondary="*neutral_900",
color_accent="*primary_500",
)
custom_css = """
/* VOYAGER Premium Styling Clones */
.gradio-container {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
}
#header-container {
text-align: center;
margin-bottom: 2rem;
padding-top: 2rem;
}
#header-title {
font-size: 2.5rem;
font-weight: 800;
background: linear-gradient(90deg, #818cf8, #3b82f6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.5rem;
display: flex;
justify-content: center;
align-items: center;
gap: 12px;
}
#header-subtitle {
color: #94a3b8;
font-size: 1.1rem;
font-weight: 400;
}
/* Stats Row styling */
.stat-box {
background: #1e293b;
border-radius: 8px;
padding: 1.25rem;
text-align: center;
border: 1px solid #334155;
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
}
.stat-number {
font-size: 1.8rem;
font-weight: 700;
color: #818cf8;
margin-bottom: 0.25rem;
}
.stat-label {
font-size: 0.75rem;
font-weight: 600;
color: #64748b;
text-transform: uppercase;
letter-spacing: 0.05em;
}
/* Episode Log Console */
#episode-log {
background-color: #0f172a;
color: #a78bfa;
font-family: 'JetBrains Mono', 'Fira Code', monospace;
padding: 1rem;
border: 1px solid #1e293b;
border-radius: 8px;
height: 250px;
overflow-y: auto;
}
#episode-log.yellow-text { color: #facc15; }
/* Tool blocks styling */
.tool-group {
background-color: #1e293b;
border-radius: 6px;
padding: 10px;
margin-bottom: 10px;
border-left: 3px solid #818cf8;
}
.tool-group h4 {
color: #94a3b8;
margin-top: 0;
margin-bottom: 8px;
font-size: 0.85rem;
text-transform: uppercase;
}
.tool-tag {
display: inline-block;
background: #0f172a;
color: #38bdf8;
padding: 3px 8px;
border-radius: 4px;
font-size: 0.8rem;
margin: 2px;
font-family: 'JetBrains Mono', monospace;
}
"""
with gr.Blocks(theme=theme, css=custom_css, title="PrivilegeDesk - OpenEnv") as demo:
# ─── HEADER ───────────────────────────────────────────────────────────
with gr.Column(elem_id="header-container"):
gr.HTML("""
""")
# ─── STATS ROW ────────────────────────────────────────────────────────
with gr.Row():
def make_stat_box(val, label):
return f""""""
gr.HTML(make_stat_box("19", "Tools"))
gr.HTML(make_stat_box("6", "Categories"))
gr.HTML(make_stat_box("18", "Reward Signals"))
gr.HTML(make_stat_box("3", "Difficulty Levels"))
gr.HTML(make_stat_box("3", "Tasks (Easy, Med, Hard)"))
# We use state to hold cumulative reward / step
step_disp = gr.HTML(make_stat_box("-", "Current Step"))
reward_disp = gr.HTML(make_stat_box("-", "Cum. Reward"))
# ─── MAIN INTERFACE ───────────────────────────────────────────────────
with gr.Row():
# LEFT COLUMN: Interactive Demo
with gr.Column(scale=2):
gr.Markdown("### 🎮 Interactive Demo – Try the Environment")
with gr.Row():
task_dropdown = gr.Dropdown(
choices=["access_decision", "jit_escalation", "access_review"],
value="access_decision",
label="Task",
container=False,
scale=3
)
difficulty_dropdown = gr.Dropdown(
choices=["Difficulty 1", "Difficulty 2", "Difficulty 3"],
value="Difficulty 1",
label="Difficulty",
container=False,
scale=2
)
seed_input = gr.Number(value=42, label="Seed", precision=0, container=False, scale=1)
reset_btn = gr.Button("🔄 Reset Episode", variant="huggingface") # Blue/Indigo variant
step_btn = gr.Button("▶ Step", variant="primary") # Green accent variant
with gr.Row():
tool_dropdown = gr.Dropdown(
choices=[
"policy.lookup", "policy.list",
"org.get_user", "org.get_manager", "org.list_users",
"entitlement.list", "entitlement.inspect", "entitlement.revoke",
"request.view", "request.list",
"approval.route", "approval.check_status",
"access.decide", "access.grant", "access.set_ttl",
"audit.query", "group.resolve", "workflow.check_active", "review.submit"
],
label="Tool Call",
scale=2
)
last_result = gr.Textbox(label="Last Tool Result", interactive=False, lines=1)
args_input = gr.Textbox(label="Arguments (JSON)", value="{}", lines=2)
gr.Markdown("**Episode Log**")
episode_log = gr.HTML('Press "Reset Episode" to generate a scenario...
')
# RIGHT COLUMN: Rewards & Overview
with gr.Column(scale=1):
gr.Markdown("### 🧠 PrivilegeDesk Agent Loop")
gr.HTML("""
Actor
LLM generates
→
Execute
Tool call
→
Reflect
Update memory
- Goal Inference – Read request/audit logs
- Policy Verification – Cross-reference IAM rules
- Action Execution – Approve/Deny/Revoke
""")
gr.Markdown("### 🎯 Grading Rubrics (Subgoals)")
gr.HTML("""
Task 1: Access Decision (Easy)
View Req → Check Entitlements → Policy Lookup → Decide
Task 2: JIT Escalation (Medium)
Route Approval → Attach Ticket → Set TTL → Grant
Task 3: Access Review (Hard)
Audit Usage → Resolve Groups → Test Workflows → Revoke Risks
""")
# ─── TOOL INVENTORY ───────────────────────────────────────────────────
gr.Markdown("### 🔧 Tool Inventory – 19 Tools")
gr.HTML("""
POLICY (2)
policy.lookup policy.list
ORGANIZATION (3)
org.get_user org.get_manager org.list_users
ENTITLEMENTS & GROUPS (4)
entitlement.list entitlement.inspect entitlement.revoke group.resolve
REQUEST & APPROVAL (4)
request.view request.list approval.route approval.check_status
ACCESS CONTROL (3)
access.decide access.grant access.set_ttl
AUDIT & WORKFLOW (3)
audit.query workflow.check_active review.submit
""")
# State definitions
# Local mirror of what would happen via FastApi requests so that
# the UI is perfectly synced with Server app instance
state_log = gr.State([])
state_steps = gr.State(0)
state_reward = gr.State(0.0)
# ─── LOGIC BINDINGS ───────────────────────────────────────────────────
def format_log(logs):
html = ""
for l in logs:
html += f"> {l}
"
if not html:
html = "Waiting for episode start..."
return f'{html}
'
def on_reset(task, diff, seed):
import requests # we will call our own endpoints locally
diff_level = int(diff.split(" ")[1])
res = requests.post("http://127.0.0.1:8000/reset", json={
"task_id": task,
"difficulty_level": diff_level,
"seed": int(seed) if seed else 42
})
if res.status_code == 200:
data = res.json()
init_obs = json.dumps(data.get("observation", {}), indent=2)
logs = [f"Episode reset. Task: {task}", f"Observation: {init_obs}"]
return format_log(logs), logs, "Env reset successful.", make_stat_box("0", "Current Step"), make_stat_box("0.0", "Cum. Reward"), 0, 0.0
else:
return format_log([f"Error resetting: {res.text}"]), [], "Failed to reset.", make_stat_box("-", "Current Step"), make_stat_box("-", "Cum. Reward"), 0, 0.0
def on_step(tool_name, args_str, current_logs, current_steps, current_reward):
import requests
try:
args = json.loads(args_str)
except:
logs = current_logs + ["Error: Arguments must be valid JSON"]
return format_log(logs), logs, "Invalid JSON", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward
res = requests.post("http://127.0.0.1:8000/step", json={
"action": {"tool_name": tool_name, "arguments": args}
})
if res.status_code == 200:
data = res.json()
obs = json.dumps(data.get("observation", {}), indent=2)
rew = data.get("reward", 0.0)
done = data.get("done", False)
info = data.get("info", {})
step_val = current_steps + 1
rew_val = current_reward + rew
new_logs = current_logs + [
f"Action: {tool_name}({args_str})",
f"Reward: {rew} | Terminated: {done}",
f"Observation: {obs[:300]}{'...' if len(obs) > 300 else ''}"
]
if done:
# fetch grader
grade_res = requests.post("http://127.0.0.1:8000/grader")
if grade_res.status_code == 200:
score = grade_res.json().get("score", 0.0)
new_logs.append(f"Episode Done. Final Score: {score}")
return (
format_log(new_logs),
new_logs,
"Step successful.",
make_stat_box(str(step_val), "Current Step"),
make_stat_box(f"{rew_val:.2f}", "Cum. Reward"),
step_val,
rew_val
)
else:
logs = current_logs + [f"Error on step: {res.text}"]
return format_log(logs), logs, "Step Failed", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward
reset_btn.click(
on_reset,
inputs=[task_dropdown, difficulty_dropdown, seed_input],
outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward]
)
step_btn.click(
on_step,
inputs=[tool_dropdown, args_input, state_log, state_steps, state_reward],
outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward]
)
return demo