import json from typing import Any, Callable import gradio as gr def create_custom_ui(env: Any, action_cls: Any, observation_cls: Any) -> gr.Blocks: # A dark, cyber-security focused theme mapping closely to Voyager's, # but tailored for PrivilegeDesk (blues, purples, neon green). theme = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", ).set( body_background_fill="*neutral_950", body_background_fill_dark="*neutral_950", block_background_fill="*neutral_900", block_background_fill_dark="*neutral_900", block_border_color="*neutral_800", block_border_color_dark="*neutral_800", block_label_background_fill="*neutral_800", button_primary_background_fill="*primary_600", button_primary_background_fill_dark="*primary_600", button_secondary_background_fill="*primary_800", button_secondary_background_fill_dark="*primary_800", border_color_primary="*neutral_800", background_fill_secondary="*neutral_900", color_accent="*primary_500", ) custom_css = """ /* VOYAGER Premium Styling Clones */ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; } #header-container { text-align: center; margin-bottom: 2rem; padding-top: 2rem; } #header-title { font-size: 2.5rem; font-weight: 800; background: linear-gradient(90deg, #818cf8, #3b82f6); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 0.5rem; display: flex; justify-content: center; align-items: center; gap: 12px; } #header-subtitle { color: #94a3b8; font-size: 1.1rem; font-weight: 400; } /* Stats Row styling */ .stat-box { background: #1e293b; border-radius: 8px; padding: 1.25rem; text-align: center; border: 1px solid #334155; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); } .stat-number { font-size: 1.8rem; font-weight: 700; color: #818cf8; margin-bottom: 0.25rem; } .stat-label { font-size: 0.75rem; font-weight: 600; color: #64748b; text-transform: uppercase; letter-spacing: 0.05em; } /* Episode Log Console */ #episode-log { background-color: #0f172a; color: #a78bfa; font-family: 'JetBrains Mono', 'Fira Code', monospace; padding: 1rem; border: 1px solid #1e293b; border-radius: 8px; height: 250px; overflow-y: auto; } #episode-log.yellow-text { color: #facc15; } /* Tool blocks styling */ .tool-group { background-color: #1e293b; border-radius: 6px; padding: 10px; margin-bottom: 10px; border-left: 3px solid #818cf8; } .tool-group h4 { color: #94a3b8; margin-top: 0; margin-bottom: 8px; font-size: 0.85rem; text-transform: uppercase; } .tool-tag { display: inline-block; background: #0f172a; color: #38bdf8; padding: 3px 8px; border-radius: 4px; font-size: 0.8rem; margin: 2px; font-family: 'JetBrains Mono', monospace; } """ with gr.Blocks(theme=theme, css=custom_css, title="PrivilegeDesk - OpenEnv") as demo: # ─── HEADER ─────────────────────────────────────────────────────────── with gr.Column(elem_id="header-container"): gr.HTML("""

PrivilegeDesk Environment

Interactive Simulated IAM & Privilege Management Environment for RL Agents

""") # ─── STATS ROW ──────────────────────────────────────────────────────── with gr.Row(): def make_stat_box(val, label): return f"""

{val}

{label}

""" gr.HTML(make_stat_box("19", "Tools")) gr.HTML(make_stat_box("6", "Categories")) gr.HTML(make_stat_box("18", "Reward Signals")) gr.HTML(make_stat_box("3", "Difficulty Levels")) gr.HTML(make_stat_box("3", "Tasks (Easy, Med, Hard)")) # We use state to hold cumulative reward / step step_disp = gr.HTML(make_stat_box("-", "Current Step")) reward_disp = gr.HTML(make_stat_box("-", "Cum. Reward")) # ─── MAIN INTERFACE ─────────────────────────────────────────────────── with gr.Row(): # LEFT COLUMN: Interactive Demo with gr.Column(scale=2): gr.Markdown("### 🎮 Interactive Demo – Try the Environment") with gr.Row(): task_dropdown = gr.Dropdown( choices=["access_decision", "jit_escalation", "access_review"], value="access_decision", label="Task", container=False, scale=3 ) difficulty_dropdown = gr.Dropdown( choices=["Difficulty 1", "Difficulty 2", "Difficulty 3"], value="Difficulty 1", label="Difficulty", container=False, scale=2 ) seed_input = gr.Number(value=42, label="Seed", precision=0, container=False, scale=1) reset_btn = gr.Button("🔄 Reset Episode", variant="huggingface") # Blue/Indigo variant step_btn = gr.Button("▶ Step", variant="primary") # Green accent variant with gr.Row(): tool_dropdown = gr.Dropdown( choices=[ "policy.lookup", "policy.list", "org.get_user", "org.get_manager", "org.list_users", "entitlement.list", "entitlement.inspect", "entitlement.revoke", "request.view", "request.list", "approval.route", "approval.check_status", "access.decide", "access.grant", "access.set_ttl", "audit.query", "group.resolve", "workflow.check_active", "review.submit" ], label="Tool Call", scale=2 ) last_result = gr.Textbox(label="Last Tool Result", interactive=False, lines=1) args_input = gr.Textbox(label="Arguments (JSON)", value="{}", lines=2) gr.Markdown("**Episode Log**") episode_log = gr.HTML('

Press "Reset Episode" to generate a scenario...

') # RIGHT COLUMN: Rewards & Overview with gr.Column(scale=1): gr.Markdown("### 🧠 PrivilegeDesk Agent Loop") gr.HTML("""

Actor
LLM generates → Execute
Tool call → Reflect
Update memory

Goal Inference – Read request/audit logs
Policy Verification – Cross-reference IAM rules
Action Execution – Approve/Deny/Revoke

""") gr.Markdown("### 🎯 Grading Rubrics (Subgoals)") gr.HTML("""

Task 1: Access Decision (Easy)
View Req → Check Entitlements → Policy Lookup → Decide

Task 2: JIT Escalation (Medium)
Route Approval → Attach Ticket → Set TTL → Grant

Task 3: Access Review (Hard)
Audit Usage → Resolve Groups → Test Workflows → Revoke Risks

""") # ─── TOOL INVENTORY ─────────────────────────────────────────────────── gr.Markdown("### 🔧 Tool Inventory – 19 Tools") gr.HTML("""

POLICY (2)

policy.lookup policy.list

ORGANIZATION (3)

org.get_user org.get_manager org.list_users

ENTITLEMENTS & GROUPS (4)

entitlement.list entitlement.inspect entitlement.revoke group.resolve

REQUEST & APPROVAL (4)

request.view request.list approval.route approval.check_status

ACCESS CONTROL (3)

access.decide access.grant access.set_ttl

AUDIT & WORKFLOW (3)

audit.query workflow.check_active review.submit

""") # State definitions # Local mirror of what would happen via FastApi requests so that # the UI is perfectly synced with Server app instance state_log = gr.State([]) state_steps = gr.State(0) state_reward = gr.State(0.0) # ─── LOGIC BINDINGS ─────────────────────────────────────────────────── def format_log(logs): html = "" for l in logs: html += f"

> {l}

" if not html: html = "Waiting for episode start..." return f'

{html}

' def on_reset(task, diff, seed): import requests # we will call our own endpoints locally diff_level = int(diff.split(" ")[1]) res = requests.post("http://127.0.0.1:8000/reset", json={ "task_id": task, "difficulty_level": diff_level, "seed": int(seed) if seed else 42 }) if res.status_code == 200: data = res.json() init_obs = json.dumps(data.get("observation", {}), indent=2) logs = [f"Episode reset. Task: {task}", f"Observation: {init_obs}"] return format_log(logs), logs, "Env reset successful.", make_stat_box("0", "Current Step"), make_stat_box("0.0", "Cum. Reward"), 0, 0.0 else: return format_log([f"Error resetting: {res.text}"]), [], "Failed to reset.", make_stat_box("-", "Current Step"), make_stat_box("-", "Cum. Reward"), 0, 0.0 def on_step(tool_name, args_str, current_logs, current_steps, current_reward): import requests try: args = json.loads(args_str) except: logs = current_logs + ["Error: Arguments must be valid JSON"] return format_log(logs), logs, "Invalid JSON", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward res = requests.post("http://127.0.0.1:8000/step", json={ "action": {"tool_name": tool_name, "arguments": args} }) if res.status_code == 200: data = res.json() obs = json.dumps(data.get("observation", {}), indent=2) rew = data.get("reward", 0.0) done = data.get("done", False) info = data.get("info", {}) step_val = current_steps + 1 rew_val = current_reward + rew new_logs = current_logs + [ f"Action: {tool_name}({args_str})", f"Reward: {rew} | Terminated: {done}", f"Observation: {obs[:300]}{'...' if len(obs) > 300 else ''}" ] if done: # fetch grader grade_res = requests.post("http://127.0.0.1:8000/grader") if grade_res.status_code == 200: score = grade_res.json().get("score", 0.0) new_logs.append(f"Episode Done. Final Score: {score}") return ( format_log(new_logs), new_logs, "Step successful.", make_stat_box(str(step_val), "Current Step"), make_stat_box(f"{rew_val:.2f}", "Cum. Reward"), step_val, rew_val ) else: logs = current_logs + [f"Error on step: {res.text}"] return format_log(logs), logs, "Step Failed", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward reset_btn.click( on_reset, inputs=[task_dropdown, difficulty_dropdown, seed_input], outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward] ) step_btn.click( on_step, inputs=[tool_dropdown, args_input, state_log, state_steps, state_reward], outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward] ) return demo