Spaces:

openenv-community
/

Sentinel

Sleeping

nihalaninihal Claude Opus 4.6 commited on Mar 8

Commit

e85e584

1 Parent(s): f20603d

Improve Gradio UI layout with sidebar controls, sub-tabs, and styled score widgets

- Add sidebar/main content layout with controls on left, content on right
- Replace JSON score outputs with styled HTML score cards
- Add format_scores_html and format_comparison_scores_html to chart_helpers
- Use CSS variables consistently across inspector and verdict HTML
- Add tooltips to all plots, interactive=False on DataFrames
- Widen container to 1600px, hide Gradio footer
- Use sub-tabs within each main tab for better organization

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show

app.py +152 -118
chart_helpers.py +77 -13
inspector.py +10 -10
sentinel_theme.py +12 -1

app.py CHANGED Viewed

@@ -22,6 +22,8 @@ from chart_helpers import (
     build_attack_timeline_df,
     build_comparison_df,
     build_verdict_html,
 )
 from inspector import (
     get_all_customers,
@@ -41,12 +43,13 @@ def run_single_episode(seed, trained):
     """Run a single episode and return formatted replay + charts."""
     log, scores = run_episode(trained=bool(trained), seed=int(seed))
     html = format_replay_html(log, scores)
-    scores_text = json.dumps(scores, indent=2)
     score_df = build_score_progression_df(log)
     attack_df = build_attack_timeline_df(log)
-    return html, scores_text, score_df, attack_df
 def run_before_after(seed):
@@ -71,18 +74,9 @@ def run_before_after(seed):
     untrained_score_df = build_score_progression_df(result["untrained"]["log"])
     trained_score_df = build_score_progression_df(result["trained"]["log"])
-    comparison_json = {
-        "untrained_scores": result["untrained"]["scores"],
-        "trained_scores": result["trained"]["scores"],
-        "improvement": {
-            agent: round(
-                result["trained"]["scores"][agent]
-                - result["untrained"]["scores"][agent],
-                2,
-            )
-            for agent in result["trained"]["scores"]
-        },
-    }
     return (
         untrained_html,
@@ -91,7 +85,7 @@ def run_before_after(seed):
         comparison_df,
         untrained_score_df,
         trained_score_df,
-        json.dumps(comparison_json, indent=2),
     )
@@ -113,7 +107,7 @@ def inspect_state(seed):
 # Gradio UI
 # -------------------------------------------------------------------
-with gr.Blocks(title="SentinelOps Arena") as demo:
     # Header banner
     gr.HTML(HEADER_HTML)
@@ -124,38 +118,47 @@ with gr.Blocks(title="SentinelOps Arena") as demo:
         # ============================================================
         with gr.TabItem("Run Episode"):
             with gr.Row():
-                seed_input = gr.Number(
-                    value=42, label="Random Seed", precision=0
-                )
-                trained_toggle = gr.Checkbox(
-                    value=False, label="Use Trained Worker"
-                )
-                run_btn = gr.Button("Run Episode", variant="primary")
-            with gr.Row():
-                with gr.Column(scale=2):
-                    replay_output = gr.HTML(label="Episode Replay")
-                with gr.Column(scale=1):
-                    scores_output = gr.Code(
-                        label="Final Scores", language="json"
-                    )
-            with gr.Accordion("Score Progression & Attack Timeline", open=True):
-                with gr.Row():
-                    score_plot = gr.LinePlot(
-                        x="tick",
-                        y="score",
-                        color="agent",
-                        label="Cumulative Score Progression",
-                        height=300,
                     )
-                    attack_plot = gr.BarPlot(
-                        x="attack_type",
-                        y="count",
-                        color="attack_type",
-                        label="Attack Timeline",
-                        height=300,
                     )
             run_btn.click(
                 run_single_episode,
@@ -167,50 +170,64 @@ with gr.Blocks(title="SentinelOps Arena") as demo:
         # Tab 2: Before/After Comparison
         # ============================================================
         with gr.TabItem("Untrained vs Trained"):
-            gr.Markdown(
-                "Compare how an **untrained** worker vs a **trained** worker "
-                "handles the same attack sequence."
-            )
-            with gr.Row():
-                comp_seed = gr.Number(
-                    value=42, label="Random Seed", precision=0
-                )
-                comp_btn = gr.Button("Run Comparison", variant="primary")
-            # Verdict stats
-            verdict_output = gr.HTML(label="Training Impact")
             with gr.Row():
-                untrained_output = gr.HTML(label="Untrained Worker")
-                trained_output = gr.HTML(label="Trained Worker")
-            with gr.Accordion("Score Comparison Charts", open=True):
-                comparison_bar = gr.BarPlot(
-                    x="agent",
-                    y="score",
-                    color="type",
-                    label="Score Comparison: Untrained vs Trained",
-                    height=300,
-                )
-                with gr.Row():
-                    untrained_score_plot = gr.LinePlot(
-                        x="tick",
-                        y="score",
-                        color="agent",
-                        label="Untrained Score Progression",
-                        height=250,
                     )
-                    trained_score_plot = gr.LinePlot(
-                        x="tick",
-                        y="score",
-                        color="agent",
-                        label="Trained Score Progression",
-                        height=250,
                     )
-            comparison_output = gr.Code(
-                label="Score Details", language="json"
-            )
             comp_btn.click(
                 run_before_after,
@@ -231,36 +248,53 @@ with gr.Blocks(title="SentinelOps Arena") as demo:
         # ============================================================
         with gr.TabItem("Environment Inspector"):
             with gr.Row():
-                inspect_seed = gr.Number(
-                    value=42, label="Random Seed", precision=0
-                )
-                inspect_btn = gr.Button("Inspect", variant="primary")
-            config_output = gr.HTML(label="Environment Configuration")
-            with gr.Accordion("Customers (CRM)", open=False):
-                customers_table = gr.Dataframe(
-                    label="All Customers",
-                    headers=["customer_id", "name", "tier", "region", "lifetime_value"],
-                )
-            with gr.Accordion("Invoices (Billing)", open=False):
-                invoices_table = gr.Dataframe(
-                    label="All Invoices",
-                    headers=["invoice_id", "customer_id", "amount", "status"],
-                )
-            with gr.Accordion("Tickets (Support)", open=False):
-                tickets_table = gr.Dataframe(
-                    label="All Tickets",
-                    headers=["ticket_id", "customer_id", "subject", "priority", "status", "sla_deadline_tick"],
-                )
-            with gr.Accordion("Task Queue", open=False):
-                tasks_table = gr.Dataframe(
-                    label="Task Queue",
-                    headers=["task_id", "customer_id", "task_type", "message", "arrival_tick"],
-                )
             inspect_btn.click(
                 inspect_state,

     build_attack_timeline_df,
     build_comparison_df,
     build_verdict_html,
+    format_scores_html,
+    format_comparison_scores_html,
 )
 from inspector import (
     get_all_customers,
     """Run a single episode and return formatted replay + charts."""
     log, scores = run_episode(trained=bool(trained), seed=int(seed))
     html = format_replay_html(log, scores)
+    scores_html = format_scores_html(scores)
     score_df = build_score_progression_df(log)
     attack_df = build_attack_timeline_df(log)
+    return html, scores_html, score_df, attack_df
 def run_before_after(seed):
     untrained_score_df = build_score_progression_df(result["untrained"]["log"])
     trained_score_df = build_score_progression_df(result["trained"]["log"])
+    comparison_html = format_comparison_scores_html(
+        result["untrained"]["scores"], result["trained"]["scores"]
+    )
     return (
         untrained_html,
         comparison_df,
         untrained_score_df,
         trained_score_df,
+        comparison_html,
     )
 # Gradio UI
 # -------------------------------------------------------------------
+with gr.Blocks(title="SentinelOps Arena", fill_width=True) as demo:
     # Header banner
     gr.HTML(HEADER_HTML)
         # ============================================================
         with gr.TabItem("Run Episode"):
             with gr.Row():
+                # Left sidebar for controls
+                with gr.Column(scale=1, min_width=300):
+                    gr.Markdown("### Episode Configuration")
+                    seed_input = gr.Number(
+                        value=42, label="Random Seed", precision=0,
+                        info="Seed for generating customer scenarios and attack patterns."
                     )
+                    trained_toggle = gr.Checkbox(
+                        value=False, label="Use Trained Worker",
+                        info="Toggle to use a worker trained via GRPO instead of a naive heuristic worker."
                     )
+                    run_btn = gr.Button("▶ Run Episode", variant="primary", size="lg")
+                    gr.Markdown("---")
+                    gr.Markdown("### Final Scores")
+                    scores_output = gr.HTML(elem_classes=["glow-card"])
+                # Main content area
+                with gr.Column(scale=3):
+                    with gr.Tabs():
+                        with gr.TabItem("Execution Replay"):
+                            replay_output = gr.HTML(elem_classes=["glow-card"])
+                        with gr.TabItem("Analytics & Timeline"):
+                            with gr.Row():
+                                score_plot = gr.LinePlot(
+                                    x="tick",
+                                    y="score",
+                                    color="agent",
+                                    title="Cumulative Score Progression",
+                                    tooltip=["tick", "score", "agent"],
+                                    height=350,
+                                )
+                            with gr.Row():
+                                attack_plot = gr.BarPlot(
+                                    x="attack_type",
+                                    y="count",
+                                    color="attack_type",
+                                    title="Attack Timeline",
+                                    tooltip=["attack_type", "count"],
+                                    height=350,
+                                )
             run_btn.click(
                 run_single_episode,
         # Tab 2: Before/After Comparison
         # ============================================================
         with gr.TabItem("Untrained vs Trained"):
             with gr.Row():
+                with gr.Column(scale=1, min_width=300):
+                    gr.Markdown(
+                        "### Benchmarking Mode\n"
+                        "Compare how an **untrained** worker vs a **trained** worker "
+                        "handles the same attack sequence."
                     )
+                    comp_seed = gr.Number(
+                        value=42, label="Random Seed", precision=0,
+                        info="Ensures identical attack sequence for fair comparison."
                     )
+                    comp_btn = gr.Button("▶ Run Comparison", variant="primary", size="lg")
+                    gr.Markdown("---")
+                    gr.Markdown("### Training Impact")
+                    verdict_output = gr.HTML(elem_classes=["glow-card"])
+                    comparison_output = gr.HTML(elem_classes=["glow-card"])
+                with gr.Column(scale=3):
+                    with gr.Tabs():
+                        with gr.TabItem("Execution Replays"):
+                            with gr.Row():
+                                with gr.Column():
+                                    gr.Markdown("#### 🛑 Untrained Worker")
+                                    untrained_output = gr.HTML(elem_classes=["glow-card"])
+                                with gr.Column():
+                                    gr.Markdown("#### 🚀 Trained Worker")
+                                    trained_output = gr.HTML(elem_classes=["glow-card"])
+                        with gr.TabItem("Score Analytics"):
+                            with gr.Row():
+                                comparison_bar = gr.BarPlot(
+                                    x="agent",
+                                    y="score",
+                                    color="type",
+                                    title="Score Comparison: Untrained vs Trained",
+                                    tooltip=["agent", "score", "type"],
+                                    height=350,
+                                )
+                            with gr.Row():
+                                with gr.Column():
+                                    untrained_score_plot = gr.LinePlot(
+                                        x="tick",
+                                        y="score",
+                                        color="agent",
+                                        title="Untrained Score Progression",
+                                        tooltip=["tick", "score", "agent"],
+                                        height=300,
+                                    )
+                                with gr.Column():
+                                    trained_score_plot = gr.LinePlot(
+                                        x="tick",
+                                        y="score",
+                                        color="agent",
+                                        title="Trained Score Progression",
+                                        tooltip=["tick", "score", "agent"],
+                                        height=300,
+                                    )
             comp_btn.click(
                 run_before_after,
         # ============================================================
         with gr.TabItem("Environment Inspector"):
             with gr.Row():
+                with gr.Column(scale=1, min_width=300):
+                    gr.Markdown(
+                        "### System Databases\n"
+                        "Inspect the initial state of the simulated enterprise."
+                    )
+                    inspect_seed = gr.Number(
+                        value=42, label="Random Seed", precision=0,
+                        info="Seed used for procedural generation of records."
+                    )
+                    inspect_btn = gr.Button("🔍 Inspect Databases", variant="primary", size="lg")
+                    gr.Markdown("---")
+                    config_output = gr.HTML(elem_classes=["glow-card"])
+                with gr.Column(scale=3):
+                    with gr.Tabs():
+                        with gr.TabItem("CRM System (Customers)"):
+                            customers_table = gr.Dataframe(
+                                label="Customer Database",
+                                headers=["customer_id", "name", "tier", "region", "lifetime_value"],
+                                interactive=False,
+                                elem_classes=["glow-card"]
+                            )
+                        with gr.TabItem("Billing System (Invoices)"):
+                            invoices_table = gr.Dataframe(
+                                label="Invoice Database",
+                                headers=["invoice_id", "customer_id", "amount", "status"],
+                                interactive=False,
+                                elem_classes=["glow-card"]
+                            )
+                        with gr.TabItem("Ticketing System (Support)"):
+                            tickets_table = gr.Dataframe(
+                                label="Active Tickets",
+                                headers=["ticket_id", "customer_id", "subject", "priority", "status", "sla_deadline_tick"],
+                                interactive=False,
+                                elem_classes=["glow-card"]
+                            )
+                        with gr.TabItem("Live Task Queue"):
+                            tasks_table = gr.Dataframe(
+                                label="Tasks to Process",
+                                headers=["task_id", "customer_id", "task_type", "message", "arrival_tick"],
+                                interactive=False,
+                                elem_classes=["glow-card"]
+                            )
             inspect_btn.click(
                 inspect_state,

chart_helpers.py CHANGED Viewed

@@ -9,6 +9,72 @@ from __future__ import annotations
 import pandas as pd
 def build_score_progression_df(log: list[dict]) -> pd.DataFrame:
     """Track cumulative scores for each agent at each tick.
@@ -122,29 +188,27 @@ def build_verdict_html(untrained_log: list, trained_log: list) -> str:
         diff_sign = "+" if diff > 0 else ""
         return (
             f"<div style='flex:1; text-align:center; padding:16px; "
-            f"background:#111827; border-radius:12px; margin:4px;'>"
-            f"<div style='font-size:12px; color:#888; text-transform:uppercase; "
             f"letter-spacing:1px;'>{label}</div>"
-            f"<div style='display:flex; justify-content:center; gap:24px; margin-top:8px;'>"
             f"<div>"
-            f"<div style='font-size:28px; font-weight:bold; color:#ff4444;'>{untrained_val}</div>"
-            f"<div style='font-size:10px; color:#888;'>Untrained</div>"
             f"</div>"
             f"<div>"
-            f"<div style='font-size:28px; font-weight:bold; color:#00ff41;'>{trained_val}</div>"
-            f"<div style='font-size:10px; color:#888;'>Trained</div>"
             f"</div>"
             f"</div>"
-            f"<div style='font-size:14px; color:{diff_color}; margin-top:6px; "
-            f"font-weight:bold;'>{diff_sign}{diff}</div>"
             f"</div>"
         )
     html = (
-        "<div style='font-family:system-ui,sans-serif; padding:12px;'>"
-        "<h3 style='text-align:center; color:#e0e0e0; margin-bottom:12px;'>"
-        "Training Impact Verdict</h3>"
-        "<div style='display:flex; gap:8px;'>"
     )
     html += _stat_card(
         "Attacks Launched",

 import pandas as pd
+def format_comparison_scores_html(untrained: dict, trained: dict) -> str:
+    """Format comparative scores for untrained vs trained."""
+    colors = {
+        "attacker": "var(--sentinel-red)",
+        "worker": "var(--sentinel-blue)",
+        "oversight": "var(--sentinel-green)",
+    }
+    html = "<div style='display:flex; flex-direction:column; gap:8px;'>"
+    for agent in untrained.keys():
+        color = colors.get(agent, "#888")
+        u_score = untrained[agent]
+        t_score = trained[agent]
+        diff = t_score - u_score
+        diff_color = "#44bb44" if diff > 0 else ("#ff4444" if diff < 0 else "#888")
+        diff_sign = "+" if diff > 0 else ""
+        html += (
+            f"<div style='display:flex; flex-direction:column; padding:12px 16px; "
+            f"background:var(--sentinel-surface); border:1px solid var(--sentinel-border); "
+            f"border-radius:6px; border-left:4px solid {color};'>"
+            f"<div style='font-family:\"IBM Plex Mono\", monospace; font-weight:bold; "
+            f"text-transform:uppercase; letter-spacing:1px; margin-bottom:8px;'>{agent}</div>"
+            f"<div style='display:flex; justify-content:space-between; align-items:center;'>"
+            f"<div style='font-family:\"IBM Plex Mono\", monospace;'>"
+            f"<span style='color:#888; font-size:12px; margin-right:8px;'>UNTRAINED:</span>"
+            f"<span style='font-weight:bold;'>{u_score:.1f}</span>"
+            f"</div>"
+            f"<div style='font-family:\"IBM Plex Mono\", monospace;'>"
+            f"<span style='color:#888; font-size:12px; margin-right:8px;'>TRAINED:</span>"
+            f"<span style='font-weight:bold; color:{color};'>{t_score:.1f}</span>"
+            f"</div>"
+            f"<div style='font-family:\"IBM Plex Mono\", monospace; font-weight:bold; color:{diff_color};'>"
+            f"{diff_sign}{diff:.1f}"
+            f"</div>"
+            f"</div>"
+            f"</div>"
+        )
+    html += "</div>"
+    return html
+def format_scores_html(scores: dict) -> str:
+    """Format final scores as a styled HTML widget."""
+    colors = {
+        "attacker": "var(--sentinel-red)",
+        "worker": "var(--sentinel-blue)",
+        "oversight": "var(--sentinel-green)",
+    }
+    html = "<div style='display:flex; flex-direction:column; gap:8px;'>"
+    for agent, score in scores.items():
+        color = colors.get(agent, "#888")
+        html += (
+            f"<div style='display:flex; justify-content:space-between; align-items:center; "
+            f"padding:12px 16px; background:var(--sentinel-surface); border:1px solid var(--sentinel-border); "
+            f"border-radius:6px; border-left:4px solid {color};'>"
+            f"<span style='font-family:\"IBM Plex Mono\", monospace; font-weight:bold; "
+            f"text-transform:uppercase; letter-spacing:1px;'>{agent}</span>"
+            f"<span style='font-family:\"IBM Plex Mono\", monospace; font-size:18px; "
+            f"font-weight:bold; color:{color};'>{score:.1f}</span>"
+            f"</div>"
+        )
+    html += "</div>"
+    return html
 def build_score_progression_df(log: list[dict]) -> pd.DataFrame:
     """Track cumulative scores for each agent at each tick.
         diff_sign = "+" if diff > 0 else ""
         return (
             f"<div style='flex:1; text-align:center; padding:16px; "
+            f"background:var(--sentinel-surface); border-radius:8px; border:1px solid var(--sentinel-border); margin:4px;'>"
+            f"<div style='font-size:11px; color:var(--sentinel-text); text-transform:uppercase; "
             f"letter-spacing:1px;'>{label}</div>"
+            f"<div style='display:flex; justify-content:center; align-items:center; gap:24px; margin-top:12px;'>"
             f"<div>"
+            f"<div style='font-size:28px; font-weight:bold; color:var(--sentinel-red);'>{untrained_val}</div>"
+            f"<div style='font-size:10px; color:#888; text-transform:uppercase;'>Untrained</div>"
             f"</div>"
             f"<div>"
+            f"<div style='font-size:28px; font-weight:bold; color:var(--sentinel-green);'>{trained_val}</div>"
+            f"<div style='font-size:10px; color:#888; text-transform:uppercase;'>Trained</div>"
             f"</div>"
             f"</div>"
+            f"<div style='font-size:14px; color:{diff_color}; margin-top:12px; "
+            f"font-weight:bold;'>Difference: {diff_sign}{diff}</div>"
             f"</div>"
         )
     html = (
+        "<div style='font-family:\"IBM Plex Mono\", monospace; padding:12px;'>"
+        "<div style='display:flex; gap:16px;'>"
     )
     html += _stat_card(
         "Attacks Launched",

inspector.py CHANGED Viewed

@@ -82,15 +82,15 @@ def get_env_config_html(env: SentinelOpsArena) -> str:
     sla = env.ticketing.sla_rules.model_dump()
     css = (
-        "font-family: 'Courier New', monospace;"
-        "background: #0d1117;"
-        "color: #c9d1d9;"
         "padding: 20px;"
-        "border-radius: 10px;"
-        "border: 1px solid #30363d;"
     )
     heading_css = (
-        "color: #39ff14;"
         "font-size: 14px;"
         "font-weight: bold;"
         "margin: 16px 0 8px 0;"
@@ -105,17 +105,17 @@ def get_env_config_html(env: SentinelOpsArena) -> str:
     th_css = (
         "text-align: left;"
         "padding: 6px 12px;"
-        "border-bottom: 1px solid #30363d;"
-        "color: #58a6ff;"
         "font-size: 12px;"
     )
     td_css = (
         "padding: 6px 12px;"
-        "border-bottom: 1px solid #21262d;"
         "font-size: 13px;"
     )
     val_css = (
-        "color: #39ff14;"
         "font-weight: bold;"
     )

     sla = env.ticketing.sla_rules.model_dump()
     css = (
+        "font-family: 'IBM Plex Mono', monospace;"
+        "background: var(--sentinel-surface);"
+        "color: var(--sentinel-text);"
         "padding: 20px;"
+        "border-radius: 8px;"
+        "border: 1px solid var(--sentinel-border);"
     )
     heading_css = (
+        "color: var(--sentinel-green);"
         "font-size: 14px;"
         "font-weight: bold;"
         "margin: 16px 0 8px 0;"
     th_css = (
         "text-align: left;"
         "padding: 6px 12px;"
+        "border-bottom: 1px solid var(--sentinel-border);"
+        "color: var(--sentinel-blue);"
         "font-size: 12px;"
     )
     td_css = (
         "padding: 6px 12px;"
+        "border-bottom: 1px solid rgba(201, 209, 217, 0.1);"
         "font-size: 13px;"
     )
     val_css = (
+        "color: var(--sentinel-green);"
         "font-weight: bold;"
     )

sentinel_theme.py CHANGED Viewed

@@ -153,10 +153,21 @@ CUSTOM_CSS = """
     --sentinel-text: #c9d1d9;
 }
 /* ====================== GLOBAL ====================== */
 .gradio-container {
     background: var(--sentinel-bg) !important;
-    max-width: 1200px !important;
 }
 /* ====================== TAB HEADERS ====================== */

     --sentinel-text: #c9d1d9;
 }
+footer {
+    display: none !important;
+}
 /* ====================== GLOBAL ====================== */
 .gradio-container {
     background: var(--sentinel-bg) !important;
+    max-width: 100% !important;
+    padding: 0 !important;
+}
+.gradio-container > .main {
+    max-width: 1600px;
+    margin: 0 auto;
+    padding: 0 20px;
 }
 /* ====================== TAB HEADERS ====================== */