Spaces:

Rayugacodes
/

KernelX

Running

App Files Files Community

Rayugacodes commited on 12 days ago

Commit

e019ca1

verified ·

1 Parent(s): 644149b

Full-screen UI + OpenEnv API tab (reset/step/state/stop)

Browse files

Files changed (1) hide show

app.py +266 -215

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """
-KernelX — Interactive Kernel Scheduler Simulation
 AI-Powered Linux Scheduling with eBPF + SmolLM2-360M
 """
 import json
 import random
 import numpy as np
 import gradio as gr
 import plotly.graph_objects as go
@@ -20,6 +21,7 @@ IDX_CTX_SWITCHES = 8
 IDX_EXEC_NS = 4
 COLORS = {"baseline": "#6b7280", "heuristic": "#f59e0b", "ai": "#06b6d4"}
 def format_state(features):
     return " | ".join(
@@ -88,7 +90,6 @@ def load_data():
         from huggingface_hub import hf_hub_download
         path = hf_hub_download(repo_id="Rayugacodes/kernelx-training-data", filename="test.jsonl", repo_type="dataset")
         DATA = [json.loads(l) for l in open(path) if l.strip()][:5000]
-        print(f"Loaded {len(DATA)} transitions")
     except Exception:
         DATA = []
         for i in range(2000):
@@ -99,31 +100,83 @@ def load_data():
 load_data()
 # ---------------------------------------------------------------------------
-# Simulation engine
 # ---------------------------------------------------------------------------
-def run_full_simulation(n_steps):
-    n = int(n_steps)
-    recs = random.sample(DATA, min(n, len(DATA)))
-    results = {k: {"rewards": [], "latencies": [], "actions": [], "cum_rewards": []} for k in ["baseline", "heuristic", "ai"]}
-    prevs = {"baseline": 0., "heuristic": 0., "ai": 0.}
-    fns = {"baseline": baseline_action, "heuristic": heuristic_action, "ai": ai_action}
-    for rec in recs:
-        s, ns_raw = rec["state"], rec["next_state"]
-        for k, fn in fns.items():
-            a = fn(s)
-            ns = simulate_effect(s, ns_raw, a)
-            r = compute_reward(s, ns, a, prevs[k])
-            results[k]["rewards"].append(r)
-            results[k]["latencies"].append(ns[IDX_WAIT_US])
-            results[k]["actions"].append(a)
-            cum = (results[k]["cum_rewards"][-1] if results[k]["cum_rewards"] else 0) + r
-            results[k]["cum_rewards"].append(cum)
-            prevs[k] = a
-    return results, recs
 # ---------------------------------------------------------------------------
 # Charts
@@ -131,20 +184,18 @@ def run_full_simulation(n_steps):
 CHART_LAYOUT = dict(
     template="plotly_dark",
-    paper_bgcolor="#0f172a",
     plot_bgcolor="#1e293b",
-    font=dict(color="#e2e8f0", family="JetBrains Mono, monospace"),
     margin=dict(l=50, r=20, t=50, b=40),
     legend=dict(bgcolor="rgba(0,0,0,0.3)", bordercolor="#334155"),
 )
-LABELS = {"baseline": "Linux CFS (Default)", "heuristic": "Heuristic Rules", "ai": "AI Strategist (SmolLM2)"}
 def make_cumulative_chart(results):
     fig = go.Figure()
     for k in ["baseline", "heuristic", "ai"]:
         fig.add_trace(go.Scatter(y=results[k]["cum_rewards"], name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
-    fig.update_layout(**CHART_LAYOUT, title="Cumulative Reward Over Time", xaxis_title="Step", yaxis_title="Cumulative Reward", height=400)
     fig.add_hline(y=0, line_dash="dash", line_color="#475569", opacity=0.5)
     return fig
@@ -156,78 +207,82 @@ def make_latency_chart(results):
         if len(lat) >= window:
             smooth = np.convolve(lat, np.ones(window)/window, mode="valid")
             fig.add_trace(go.Scatter(y=smooth, name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
-    fig.update_layout(**CHART_LAYOUT, title="Rolling Average Latency (lower = better)", xaxis_title="Step", yaxis_title="Wait Time (us)", height=400)
     return fig
 def make_action_chart(results):
     fig = make_subplots(rows=1, cols=3, subplot_titles=[LABELS[k] for k in ["baseline", "heuristic", "ai"]])
     for i, k in enumerate(["baseline", "heuristic", "ai"], 1):
         fig.add_trace(go.Histogram(x=results[k]["actions"], nbinsx=40, marker_color=COLORS[k], opacity=0.8, showlegend=False), row=1, col=i)
-    fig.update_layout(**CHART_LAYOUT, title="Action Distribution", height=300)
     fig.update_xaxes(range=[-1.1, 1.1])
     return fig
 def make_summary_bars(results):
-    labels_list = [LABELS[k] for k in ["baseline", "heuristic", "ai"]]
-    colors_list = [COLORS[k] for k in ["baseline", "heuristic", "ai"]]
-    fig = make_subplots(rows=1, cols=3, subplot_titles=["Mean Reward (higher=better)", "Avg Latency (lower=better)", "Positive Reward %"])
-    rewards = [np.mean(results[k]["rewards"]) for k in ["baseline", "heuristic", "ai"]]
-    lats = [np.mean(results[k]["latencies"]) for k in ["baseline", "heuristic", "ai"]]
-    pos = [sum(1 for r in results[k]["rewards"] if r > 0)/len(results[k]["rewards"])*100 for k in ["baseline", "heuristic", "ai"]]
-    fig.add_trace(go.Bar(x=labels_list, y=rewards, marker_color=colors_list, showlegend=False, text=[f"{v:.2f}" for v in rewards], textposition="outside"), row=1, col=1)
-    fig.add_trace(go.Bar(x=labels_list, y=lats, marker_color=colors_list, showlegend=False, text=[f"{v:.1f}" for v in lats], textposition="outside"), row=1, col=2)
-    fig.add_trace(go.Bar(x=labels_list, y=pos, marker_color=colors_list, showlegend=False, text=[f"{v:.0f}%" for v in pos], textposition="outside"), row=1, col=3)
-    fig.update_layout(**CHART_LAYOUT, height=350)
-    return fig
 # ---------------------------------------------------------------------------
 # Gradio handlers
 # ---------------------------------------------------------------------------
 def simulate(n_steps):
-    results, recs = run_full_simulation(n_steps)
-    # Metrics
     base_r, heur_r, ai_r = np.mean(results["baseline"]["rewards"]), np.mean(results["heuristic"]["rewards"]), np.mean(results["ai"]["rewards"])
     base_l, ai_l = np.mean(results["baseline"]["latencies"]), np.mean(results["ai"]["latencies"])
     lat_imp = ((base_l - ai_l) / base_l * 100) if base_l > 0 else 0
     reward_imp = ((ai_r - base_r) / abs(base_r) * 100) if base_r != 0 else 0
-    summary_md = f"""
-### Results ({int(n_steps)} steps on real kernel telemetry)
 | | Linux CFS | Heuristic | **AI Strategist** |
 |---|---|---|---|
-| Mean Reward | {base_r:.4f} | {heur_r:.4f} | **{ai_r:.4f}** |
-| Avg Latency | {base_l:.1f}us | {np.mean(results['heuristic']['latencies']):.1f}us | **{ai_l:.1f}us** |
-| Latency Reduction | — | {((base_l - np.mean(results['heuristic']['latencies'])) / base_l * 100):.1f}% | **{lat_imp:.1f}%** |
-| Reward vs Baseline | — | {((heur_r - base_r) / abs(base_r) * 100):+.1f}% | **{reward_imp:+.1f}%** |
 """
-    return (
-        summary_md,
-        make_cumulative_chart(results),
-        make_latency_chart(results),
-        make_action_chart(results),
-        make_summary_bars(results),
-    )
 def explore_state(idx):
     rec = DATA[int(idx) % len(DATA)]
     s, ns_raw = rec["state"], rec["next_state"]
     a_b, a_h, a_ai = baseline_action(s), heuristic_action(s), ai_action(s)
-    ns_b = simulate_effect(s, ns_raw, a_b)
-    ns_h = simulate_effect(s, ns_raw, a_h)
-    ns_ai = simulate_effect(s, ns_raw, a_ai)
-    r_b = compute_reward(s, ns_b, a_b)
-    r_h = compute_reward(s, ns_h, a_h)
-    r_ai = compute_reward(s, ns_ai, a_ai)
     wait = s[IDX_WAIT_US]
     lat_imp = ((ns_b[IDX_WAIT_US] - ns_ai[IDX_WAIT_US]) / ns_b[IDX_WAIT_US] * 100) if ns_b[IDX_WAIT_US] > 0 else 0
@@ -238,226 +293,222 @@ def explore_state(idx):
         elif a > 0.05: return "slight demote"
         return "HOLD"
-    if wait > 50: reason = f"Very high latency ({wait:.0f}us) — aggressive priority boost to reduce scheduling delay."
-    elif wait > 15: reason = f"Elevated latency ({wait:.0f}us) — boosting priority to improve responsiveness."
     elif wait < 3: reason = f"Very low latency ({wait:.0f}us) — system healthy, minimal adjustment."
-    else: reason = f"Normal latency ({wait:.0f}us) — near-neutral action to maintain stability."
     md = f"""
-### Transition #{int(idx)}
 **PID** {rec['pid']} | **CPU** {rec['cpu']} | **Wait** {wait:.0f}us | **CSW** {s[IDX_CTX_SWITCHES]:.0f}
-`{format_state(s)}`
 | Strategy | Action | Decision | Result Latency | Reward |
 |---|---|---|---|---|
 | Linux CFS | {a_b:+.4f} | {meaning(a_b)} | {ns_b[IDX_WAIT_US]:.1f}us | {r_b:+.4f} |
 | Heuristic | {a_h:+.4f} | {meaning(a_h)} | {ns_h[IDX_WAIT_US]:.1f}us | {r_h:+.4f} |
 | **AI Strategist** | **{a_ai:+.4f}** | **{meaning(a_ai)}** | **{ns_ai[IDX_WAIT_US]:.1f}us** | **{r_ai:+.4f}** |
-**AI reduced latency by {lat_imp:.1f}%** vs Linux default.
-> **AI Reasoning:** {reason}
 """
-    # Mini chart: action comparison
     fig = go.Figure()
-    fig.add_trace(go.Bar(x=["Linux CFS", "Heuristic", "AI"], y=[a_b, a_h, a_ai],
                          marker_color=[COLORS["baseline"], COLORS["heuristic"], COLORS["ai"]],
                          text=[f"{a_b:+.2f}", f"{a_h:+.2f}", f"{a_ai:+.2f}"], textposition="outside"))
-    fig.update_layout(**CHART_LAYOUT, title="Action Comparison", yaxis_title="Action Value", height=280,
-                      yaxis_range=[-1.1, 0.5])
     fig.add_hline(y=0, line_dash="dash", line_color="#475569")
     return md, fig
 # ---------------------------------------------------------------------------
 # App
 # ---------------------------------------------------------------------------
 CSS = """
-.gradio-container { max-width: 1400px !important; }
 .dark { background-color: #0f172a !important; }
-h1 { color: #06b6d4 !important; font-family: 'JetBrains Mono', monospace !important; }
 h2, h3 { color: #e2e8f0 !important; }
-.metric-box { background: #1e293b; border: 1px solid #334155; border-radius: 8px; padding: 16px; text-align: center; }
-.metric-value { font-size: 2em; font-weight: bold; color: #06b6d4; }
-.metric-label { color: #94a3b8; font-size: 0.9em; }
 """
 with gr.Blocks(title="KernelX — AI Kernel Scheduler", css=CSS, theme=gr.themes.Base(primary_hue="cyan", neutral_hue="slate")) as app:
-    # Header
     gr.Markdown("""
-# KernelX
-### AI-Powered Linux Kernel Scheduler | eBPF + SmolLM2-360M | 44ms Inference
-Real-time scheduling optimization using reinforcement learning on live kernel telemetry.
-534K transitions collected via eBPF sentinel. Model trained with SFT + GRPO.
     """)
-    # Tab 1: Live Simulation
-    with gr.Tab("Simulation", id="sim"):
-        gr.Markdown("#### Compare AI Strategist vs Linux Default vs Heuristic on real kernel data")
         with gr.Row():
-            n_slider = gr.Slider(50, 2000, value=500, step=50, label="Simulation Steps", scale=3)
             run_btn = gr.Button("Run Simulation", variant="primary", scale=1, size="lg")
         summary = gr.Markdown()
-        with gr.Row():
             cumulative_plot = gr.Plot(label="Cumulative Reward")
-            latency_plot = gr.Plot(label="Latency Comparison")
         with gr.Row():
-            action_plot = gr.Plot(label="Action Distribution")
-        summary_bars = gr.Plot(label="Performance Summary")
-        run_btn.click(
-            fn=simulate, inputs=[n_slider],
-            outputs=[summary, cumulative_plot, latency_plot, action_plot, summary_bars]
-        )
-    # Tab 2: State Explorer
-    with gr.Tab("State Explorer", id="explore"):
-        gr.Markdown("#### Inspect individual kernel states and see how each strategy decides")
         with gr.Row():
-            idx_slider = gr.Slider(0, min(len(DATA)-1, 4999), value=0, step=1, label="Transition Index", scale=3)
-            explore_btn = gr.Button("Analyze", variant="primary", scale=1)
         with gr.Row():
-            state_md = gr.Markdown()
-            action_bar = gr.Plot(label="Action Comparison")
-        explore_btn.click(fn=explore_state, inputs=[idx_slider], outputs=[state_md, action_bar])
-    # Tab 3: RL Explanation
-    with gr.Tab("How RL Improves", id="rl"):
         gr.Markdown("""
-## Policy Iteration: How KernelX Gets Smarter
 ```
  COLLECT                    TRAIN                     DEPLOY
 ┌──────────┐           ┌──────────────┐          ┌──────────────┐
-│ Run live  │           │ SFT warm-    │          │ Hot-swap     │
 │ kernel    │ ────────> │ start +      │ ───────> │ GGUF model   │ ──┐
-│ w/ policy │  JSONL    │ GRPO RL      │  .gguf   │ in brain     │   │
 └──────────┘           └──────────────┘          └──────────────┘   │
      ^                                                               │
-     └───────────────── REPEAT with better policy ──────────────────┘
 ```
-### Iteration Progression
-| Iteration | Policy | Behavior | Expected Improvement |
-|:---------:|--------|----------|---------------------|
-| **0** | Linux CFS Default | No AI intervention. Generic scheduler. | Baseline |
-| **1** | SFT Warm-Start | Learns from heuristic labels. Matches rules. | Match heuristic |
-| **2** | GRPO on Iter 1 | Sees ACTUAL outcomes of its actions. | +10-20% over heuristic |
-| **3+** | GRPO on Iter 2+ | Recursive self-improvement. | Diminishing returns |
-### Why AI Beats the Default Scheduler
-The Linux **Completely Fair Scheduler (CFS)** is designed for *all possible workloads*.
-It has no knowledge of YOUR specific system's patterns.
-KernelX learns:
-- Which PIDs are latency-sensitive (and should be boosted)
-- When high context switches indicate CPU contention (and should be dampened)
-- How vruntime correlates with scheduling fairness for YOUR workload
-- Timing patterns that no hand-written heuristic captures
 ### Training Evidence
-| Metric | Before | After | Change |
-|--------|--------|-------|--------|
-| Training Loss | 2.05 | 0.28 | -86% |
-| Token Accuracy | 61% | 91% | +49% |
-| Format Compliance | 0% | 100% | — |
-| Inference Latency | — | 44ms | Sub-50ms target met |
-| Model Size | 1.4GB | 258MB | Q4_K_M quantization |
 ### Reward Function
-$$R_t = \\alpha \\cdot \\log(\\Delta_{exec} + 1) - \\beta \\cdot \\Delta_{wait} - \\gamma \\cdot |a_t - a_{t-1}|$$
-| Component | Weight | What it rewards |
-|-----------|--------|----------------|
-| Throughput | alpha=1.0 | CPU progress (more exec_runtime = good) |
-| Latency | beta=2.0 | Low wait time (penalizes increases) |
-| Stability | gamma=0.5 | Smooth actions (penalizes jitter) |
         """)
-    # Tab 4: Architecture
-    with gr.Tab("Architecture", id="arch"):
         gr.Markdown("""
-## KernelX System Architecture
 ```
-┌─────────────────────────────────────────────────────────────────────┐
-│                        LINUX KERNEL SPACE                           │
-│                                                                     │
-│   sched_switch ──> eBPF Sentinel ──> 24D Feature Vector             │
-│        │                                    │                       │
-│   priority_actions map <── BPF Ring Buffer ──┘                      │
-│        │                        │                                   │
-└────────│────────────────────────│───────────────────────────────────┘
-         │                        │
-         │              ┌─────────v──────────┐
-         │              │   RUST BRIDGE      │
-         │              │                    │
-         │              │  Ring Buffer ──> SHM (/dev/shm/kernelx_state)
-         │              │       │                                     │
-         │              │       └──> trajectories.jsonl               │
-         │              │                                             │
-         │              │  ZMQ Sub <── action weights                 │
-         │              └────────────────────┘
-         │                        │
-         │              ┌─────────v──────────┐
          │              │   PYTHON BRAIN     │
          │              │   (OpenEnv)        │
          │              │                    │
-         │              │  SHM ──> 10D features ──> SmolLM2-360M     │
-         │              │                              │              │
-         │              │  Action [-1, 1] <────────────┘              │
-         │              │       │                                     │
-         │              │       └──> ZMQ Pub ──> Bridge               │
          │              └────────────────────┘
-         │
-         └──── Kernel applies scheduling nudge at next sched_switch
 ```
-### Component Details
-| Component | Language | Role | Latency |
-|-----------|---------|------|---------|
-| eBPF Sentinel | C | Kernel telemetry extraction | <1us |
-| Rust Bridge | Rust | SHM sync + trajectory recording | <1ms |
-| Python Brain | Python | AI inference + OpenEnv server | 44ms |
-| SmolLM2-360M | GGUF | Scheduling decision model | 44ms |
-| Ratatui TUI | Rust | Real-time monitoring dashboard | 100ms refresh |
-### Data Flow
-| Step | Data | Format | Size |
-|------|------|--------|------|
-| Kernel -> Bridge | 24D telemetry | BPF ring buffer | 208 bytes/event |
-| Bridge -> Brain | Active state | Shared memory | 376 bytes |
-| Bridge -> Disk | Transitions | JSONL | ~300 bytes/line |
-| Brain -> Bridge | Action | ZMQ string | ~50 bytes |
-| Brain -> Kernel | Priority weight | BPF map | 8 bytes |
         """)
-    # Footer
     gr.Markdown("""
----
-[Model](https://huggingface.co/Rayugacodes/kernelx-strategist) |
-[Data](https://huggingface.co/datasets/Rayugacodes/kernelx-training-data) |
-[Colab](https://colab.research.google.com/github/pie-314/KernelX/blob/model-training-hugging-face-integration/KernelX_Training.ipynb) |
-[GitHub](https://github.com/pie-314/KernelX) |
-Built for Meta PyTorch OpenEnv Hackathon 2026
     """)
 app.launch(server_name="0.0.0.0", server_port=7860)

 """
+KernelX — Interactive Kernel Scheduler Simulation + OpenEnv API
 AI-Powered Linux Scheduling with eBPF + SmolLM2-360M
 """
 import json
 import random
+import uuid
 import numpy as np
 import gradio as gr
 import plotly.graph_objects as go
 IDX_EXEC_NS = 4
 COLORS = {"baseline": "#6b7280", "heuristic": "#f59e0b", "ai": "#06b6d4"}
+LABELS = {"baseline": "Linux CFS (Default)", "heuristic": "Heuristic Rules", "ai": "AI Strategist (SmolLM2)"}
 def format_state(features):
     return " | ".join(
         from huggingface_hub import hf_hub_download
         path = hf_hub_download(repo_id="Rayugacodes/kernelx-training-data", filename="test.jsonl", repo_type="dataset")
         DATA = [json.loads(l) for l in open(path) if l.strip()][:5000]
     except Exception:
         DATA = []
         for i in range(2000):
 load_data()
 # ---------------------------------------------------------------------------
+# OpenEnv Environment State (for API endpoints)
 # ---------------------------------------------------------------------------
+class KernelXSimEnv:
+    """OpenEnv-compliant environment running in simulation mode."""
+    def __init__(self):
+        self.episode_id = str(uuid.uuid4())
+        self.step_count = 0
+        self.current_idx = 0
+        self.prev_action = 0.0
+        self.cumulative_reward = 0.0
+        self.running = False
+    def reset(self):
+        self.episode_id = str(uuid.uuid4())
+        self.step_count = 0
+        self.current_idx = random.randint(0, len(DATA) - 100)
+        self.prev_action = 0.0
+        self.cumulative_reward = 0.0
+        self.running = True
+        obs = DATA[self.current_idx]["state"]
+        return {
+            "observation": obs,
+            "features": dict(zip(FEATURE_NAMES, obs)),
+            "pid": DATA[self.current_idx]["pid"],
+            "episode_id": self.episode_id,
+        }
+    def step(self, action_value=None):
+        if not self.running:
+            return {"error": "Environment not started. Call /reset first."}
+        rec = DATA[min(self.current_idx + self.step_count, len(DATA) - 1)]
+        state = rec["state"]
+        next_state_raw = rec["next_state"]
+        if action_value is None:
+            action_value = ai_action(state)
+        action_value = max(-1.0, min(1.0, float(action_value)))
+        ns = simulate_effect(state, next_state_raw, action_value)
+        reward = compute_reward(state, ns, action_value, self.prev_action)
+        self.step_count += 1
+        self.prev_action = action_value
+        self.cumulative_reward += reward
+        return {
+            "observation": ns,
+            "features": dict(zip(FEATURE_NAMES, ns)),
+            "action_taken": action_value,
+            "reward": reward,
+            "cumulative_reward": self.cumulative_reward,
+            "step": self.step_count,
+            "done": self.step_count >= 100,
+            "pid": rec["pid"],
+        }
+    def state(self):
+        return {
+            "episode_id": self.episode_id,
+            "step_count": self.step_count,
+            "cumulative_reward": self.cumulative_reward,
+            "running": self.running,
+        }
+    def stop(self):
+        self.running = False
+        return {
+            "episode_id": self.episode_id,
+            "total_steps": self.step_count,
+            "final_reward": self.cumulative_reward,
+            "status": "stopped",
+        }
+ENV = KernelXSimEnv()
 # ---------------------------------------------------------------------------
 # Charts
 CHART_LAYOUT = dict(
     template="plotly_dark",
+    paper_bgcolor="rgba(0,0,0,0)",
     plot_bgcolor="#1e293b",
+    font=dict(color="#e2e8f0", family="Inter, system-ui, sans-serif", size=12),
     margin=dict(l=50, r=20, t=50, b=40),
     legend=dict(bgcolor="rgba(0,0,0,0.3)", bordercolor="#334155"),
 )
 def make_cumulative_chart(results):
     fig = go.Figure()
     for k in ["baseline", "heuristic", "ai"]:
         fig.add_trace(go.Scatter(y=results[k]["cum_rewards"], name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
+    fig.update_layout(**CHART_LAYOUT, title="Cumulative Reward", xaxis_title="Step", yaxis_title="Reward", height=380)
     fig.add_hline(y=0, line_dash="dash", line_color="#475569", opacity=0.5)
     return fig
         if len(lat) >= window:
             smooth = np.convolve(lat, np.ones(window)/window, mode="valid")
             fig.add_trace(go.Scatter(y=smooth, name=LABELS[k], line=dict(color=COLORS[k], width=2.5)))
+    fig.update_layout(**CHART_LAYOUT, title="Rolling Avg Latency (lower = better)", xaxis_title="Step", yaxis_title="Wait (us)", height=380)
     return fig
 def make_action_chart(results):
     fig = make_subplots(rows=1, cols=3, subplot_titles=[LABELS[k] for k in ["baseline", "heuristic", "ai"]])
     for i, k in enumerate(["baseline", "heuristic", "ai"], 1):
         fig.add_trace(go.Histogram(x=results[k]["actions"], nbinsx=40, marker_color=COLORS[k], opacity=0.8, showlegend=False), row=1, col=i)
+    fig.update_layout(**CHART_LAYOUT, title="Action Distributions", height=280)
     fig.update_xaxes(range=[-1.1, 1.1])
     return fig
 def make_summary_bars(results):
+    names = [LABELS[k] for k in ["baseline", "heuristic", "ai"]]
+    cols = [COLORS[k] for k in ["baseline", "heuristic", "ai"]]
+    fig = make_subplots(rows=1, cols=3, subplot_titles=["Mean Reward", "Avg Latency (us)", "Positive %"])
+    r = [np.mean(results[k]["rewards"]) for k in ["baseline", "heuristic", "ai"]]
+    l = [np.mean(results[k]["latencies"]) for k in ["baseline", "heuristic", "ai"]]
+    p = [sum(1 for x in results[k]["rewards"] if x > 0)/len(results[k]["rewards"])*100 for k in ["baseline", "heuristic", "ai"]]
+    fig.add_trace(go.Bar(x=names, y=r, marker_color=cols, showlegend=False, text=[f"{v:.2f}" for v in r], textposition="outside"), row=1, col=1)
+    fig.add_trace(go.Bar(x=names, y=l, marker_color=cols, showlegend=False, text=[f"{v:.1f}" for v in l], textposition="outside"), row=1, col=2)
+    fig.add_trace(go.Bar(x=names, y=p, marker_color=cols, showlegend=False, text=[f"{v:.0f}%" for v in p], textposition="outside"), row=1, col=3)
+    fig.update_layout(**CHART_LAYOUT, height=320)
+    return fig
+# ---------------------------------------------------------------------------
+# Simulation engine
+# ---------------------------------------------------------------------------
+def run_full_simulation(n_steps):
+    n = int(n_steps)
+    recs = random.sample(DATA, min(n, len(DATA)))
+    results = {k: {"rewards": [], "latencies": [], "actions": [], "cum_rewards": []} for k in ["baseline", "heuristic", "ai"]}
+    prevs = {"baseline": 0., "heuristic": 0., "ai": 0.}
+    fns = {"baseline": baseline_action, "heuristic": heuristic_action, "ai": ai_action}
+    for rec in recs:
+        s, ns_raw = rec["state"], rec["next_state"]
+        for k, fn in fns.items():
+            a = fn(s)
+            ns = simulate_effect(s, ns_raw, a)
+            r = compute_reward(s, ns, a, prevs[k])
+            results[k]["rewards"].append(r)
+            results[k]["latencies"].append(ns[IDX_WAIT_US])
+            results[k]["actions"].append(a)
+            cum = (results[k]["cum_rewards"][-1] if results[k]["cum_rewards"] else 0) + r
+            results[k]["cum_rewards"].append(cum)
+            prevs[k] = a
+    return results
 # ---------------------------------------------------------------------------
 # Gradio handlers
 # ---------------------------------------------------------------------------
 def simulate(n_steps):
+    results = run_full_simulation(n_steps)
     base_r, heur_r, ai_r = np.mean(results["baseline"]["rewards"]), np.mean(results["heuristic"]["rewards"]), np.mean(results["ai"]["rewards"])
     base_l, ai_l = np.mean(results["baseline"]["latencies"]), np.mean(results["ai"]["latencies"])
     lat_imp = ((base_l - ai_l) / base_l * 100) if base_l > 0 else 0
     reward_imp = ((ai_r - base_r) / abs(base_r) * 100) if base_r != 0 else 0
+    md = f"""
 | | Linux CFS | Heuristic | **AI Strategist** |
 |---|---|---|---|
+| **Mean Reward** | {base_r:.4f} | {heur_r:.4f} | **{ai_r:.4f}** |
+| **Avg Latency** | {base_l:.1f}us | {np.mean(results['heuristic']['latencies']):.1f}us | **{ai_l:.1f}us** |
+| **Latency Reduction** | — | {((base_l - np.mean(results['heuristic']['latencies'])) / base_l * 100):.1f}% | **{lat_imp:.1f}%** |
+| **Reward vs Baseline** | — | {((heur_r - base_r) / abs(base_r) * 100):+.1f}% | **{reward_imp:+.1f}%** |
 """
+    return md, make_cumulative_chart(results), make_latency_chart(results), make_action_chart(results), make_summary_bars(results)
 def explore_state(idx):
     rec = DATA[int(idx) % len(DATA)]
     s, ns_raw = rec["state"], rec["next_state"]
     a_b, a_h, a_ai = baseline_action(s), heuristic_action(s), ai_action(s)
+    ns_b, ns_h, ns_ai = simulate_effect(s, ns_raw, a_b), simulate_effect(s, ns_raw, a_h), simulate_effect(s, ns_raw, a_ai)
+    r_b, r_h, r_ai = compute_reward(s, ns_b, a_b), compute_reward(s, ns_h, a_h), compute_reward(s, ns_ai, a_ai)
     wait = s[IDX_WAIT_US]
     lat_imp = ((ns_b[IDX_WAIT_US] - ns_ai[IDX_WAIT_US]) / ns_b[IDX_WAIT_US] * 100) if ns_b[IDX_WAIT_US] > 0 else 0
         elif a > 0.05: return "slight demote"
         return "HOLD"
+    if wait > 50: reason = f"Very high latency ({wait:.0f}us) — aggressive priority boost."
+    elif wait > 15: reason = f"Elevated latency ({wait:.0f}us) — boosting priority."
     elif wait < 3: reason = f"Very low latency ({wait:.0f}us) — system healthy, minimal adjustment."
+    else: reason = f"Normal latency ({wait:.0f}us) — near-neutral action."
     md = f"""
 **PID** {rec['pid']} | **CPU** {rec['cpu']} | **Wait** {wait:.0f}us | **CSW** {s[IDX_CTX_SWITCHES]:.0f}
 | Strategy | Action | Decision | Result Latency | Reward |
 |---|---|---|---|---|
 | Linux CFS | {a_b:+.4f} | {meaning(a_b)} | {ns_b[IDX_WAIT_US]:.1f}us | {r_b:+.4f} |
 | Heuristic | {a_h:+.4f} | {meaning(a_h)} | {ns_h[IDX_WAIT_US]:.1f}us | {r_h:+.4f} |
 | **AI Strategist** | **{a_ai:+.4f}** | **{meaning(a_ai)}** | **{ns_ai[IDX_WAIT_US]:.1f}us** | **{r_ai:+.4f}** |
+**Latency reduction: {lat_imp:.1f}%** vs baseline | *{reason}*
 """
     fig = go.Figure()
+    fig.add_trace(go.Bar(x=["Linux CFS", "Heuristic", "AI Strategist"], y=[a_b, a_h, a_ai],
                          marker_color=[COLORS["baseline"], COLORS["heuristic"], COLORS["ai"]],
                          text=[f"{a_b:+.2f}", f"{a_h:+.2f}", f"{a_ai:+.2f}"], textposition="outside"))
+    fig.update_layout(**CHART_LAYOUT, title="Action Comparison", yaxis_title="Action", height=260, yaxis_range=[-1.1, 0.5])
     fig.add_hline(y=0, line_dash="dash", line_color="#475569")
     return md, fig
+# OpenEnv API handlers for Gradio
+def api_reset():
+    result = ENV.reset()
+    return json.dumps(result, indent=2)
+def api_step(action_str):
+    try:
+        action = float(action_str) if action_str.strip() else None
+    except ValueError:
+        action = None
+    result = ENV.step(action)
+    return json.dumps(result, indent=2)
+def api_state():
+    return json.dumps(ENV.state(), indent=2)
+def api_stop():
+    return json.dumps(ENV.stop(), indent=2)
 # ---------------------------------------------------------------------------
 # App
 # ---------------------------------------------------------------------------
 CSS = """
+.gradio-container { max-width: 100% !important; padding: 0 !important; }
+.main { max-width: 100% !important; }
+#component-0 { max-width: 100% !important; }
+footer { display: none !important; }
 .dark { background-color: #0f172a !important; }
+h1 { color: #06b6d4 !important; letter-spacing: -0.02em; }
 h2, h3 { color: #e2e8f0 !important; }
+.tab-nav button { font-size: 1.05em !important; padding: 12px 24px !important; }
+.tab-nav button.selected { border-bottom: 3px solid #06b6d4 !important; color: #06b6d4 !important; }
 """
 with gr.Blocks(title="KernelX — AI Kernel Scheduler", css=CSS, theme=gr.themes.Base(primary_hue="cyan", neutral_hue="slate")) as app:
     gr.Markdown("""
+<div style="text-align:center; padding: 10px 0;">
+<h1 style="font-size:2.5em; margin-bottom:0;">KernelX</h1>
+<p style="color:#94a3b8; font-size:1.15em; margin-top:4px;">
+AI-Powered Linux Kernel Scheduler &nbsp;|&nbsp; eBPF + SmolLM2-360M &nbsp;|&nbsp; 44ms Inference &nbsp;|&nbsp; 534K Real Transitions
+</p>
+</div>
     """)
+    # --- Tab 1: Simulation ---
+    with gr.Tab("Simulation"):
         with gr.Row():
+            n_slider = gr.Slider(50, 2000, value=500, step=50, label="Steps", scale=3)
             run_btn = gr.Button("Run Simulation", variant="primary", scale=1, size="lg")
         summary = gr.Markdown()
+        with gr.Row(equal_height=True):
             cumulative_plot = gr.Plot(label="Cumulative Reward")
+            latency_plot = gr.Plot(label="Latency")
+        with gr.Row(equal_height=True):
+            action_plot = gr.Plot(label="Actions")
+        summary_bars = gr.Plot(label="Summary")
+        run_btn.click(fn=simulate, inputs=[n_slider], outputs=[summary, cumulative_plot, latency_plot, action_plot, summary_bars])
+    # --- Tab 2: State Explorer ---
+    with gr.Tab("State Explorer"):
         with gr.Row():
+            idx_slider = gr.Slider(0, min(len(DATA)-1, 4999), value=0, step=1, label="Transition #", scale=3)
+            explore_btn = gr.Button("Analyze", variant="primary", scale=1)
+        with gr.Row():
+            with gr.Column(scale=2):
+                state_md = gr.Markdown()
+            with gr.Column(scale=1):
+                action_bar = gr.Plot(label="Actions")
+        explore_btn.click(fn=explore_state, inputs=[idx_slider], outputs=[state_md, action_bar])
+    # --- Tab 3: OpenEnv API ---
+    with gr.Tab("OpenEnv API"):
+        gr.Markdown("""
+### OpenEnv-Compliant Environment API
+KernelX implements the standard `reset()` → `step(action)` → `state` → `stop()` interface.
+Use these buttons to interact with the environment programmatically.
+        """)
         with gr.Row():
+            reset_btn = gr.Button("reset()", variant="primary")
+            step_input = gr.Textbox(label="Action [-1.0 to 1.0]", placeholder="Leave blank for AI auto-action", scale=2)
+            step_btn = gr.Button("step(action)", variant="primary")
         with gr.Row():
+            state_btn = gr.Button("state()")
+            stop_btn = gr.Button("stop()", variant="stop")
+        api_output = gr.Code(label="Response (JSON)", language="json", lines=15)
+        reset_btn.click(fn=api_reset, outputs=[api_output])
+        step_btn.click(fn=api_step, inputs=[step_input], outputs=[api_output])
+        state_btn.click(fn=api_state, outputs=[api_output])
+        stop_btn.click(fn=api_stop, outputs=[api_output])
+    # --- Tab 4: How RL Improves ---
+    with gr.Tab("How RL Improves"):
         gr.Markdown("""
+<div style="max-width:900px; margin: 0 auto;">
+## Policy Iteration Loop
 ```
  COLLECT                    TRAIN                     DEPLOY
 ┌──────────┐           ┌──────────────┐          ┌──────────────┐
+│ Run live  │  JSONL    │ SFT warm-    │  .gguf   │ Hot-swap     │
 │ kernel    │ ────────> │ start +      │ ───────> │ GGUF model   │ ──┐
+│ w/ policy │           │ GRPO RL      │          │ in brain     │   │
 └──────────┘           └──────────────┘          └──────────────┘   │
      ^                                                               │
+     └───────────────── REPEAT with improved policy ────────────────┘
 ```
+| Iter | Policy | Improvement |
+|:----:|--------|-------------|
+| 0 | Linux CFS Default | Baseline (no AI) |
+| 1 | SFT Warm-Start | Matches heuristic rules |
+| 2 | GRPO on Iter 1 | Discovers patterns humans missed |
+| 3+ | GRPO on Iter 2+ | Recursive self-improvement |
 ### Training Evidence
+| Metric | Before | After |
+|--------|--------|-------|
+| Loss | 2.05 | 0.28 |
+| Accuracy | 61% | 91% |
+| Compliance | 0% | 100% |
+| Inference | — | 44ms |
+| Size | 1.4GB | 258MB |
 ### Reward Function
+**R = α·log(Δexec + 1) − β·Δwait − γ·|a − a_prev|**
+| Component | Weight | Signal |
+|-----------|--------|--------|
+| Throughput | α=1.0 | CPU progress |
+| Latency | β=2.0 | Wait time penalty |
+| Stability | γ=0.5 | Jitter penalty |
+</div>
         """)
+    # --- Tab 5: Architecture ---
+    with gr.Tab("Architecture"):
         gr.Markdown("""
+<div style="max-width:900px; margin: 0 auto;">
+## System Architecture
 ```
+┌─────────────────────── KERNEL SPACE ───────────────────────┐
+│                                                             │
+│   sched_switch ──> eBPF Sentinel ──> 24D Feature Vector     │
+│        ↑                                    │               │
+│   priority_actions ←── BPF Ring Buffer ─────┘               │
+└────────│────────────────────│───────────────────────────────┘
+         │              ┌─────v──────────────┐
+         │              │    RUST BRIDGE     │
+         │              │  Ring Buffer → SHM │
+         │              │  Ring Buffer → JSONL│
+         │              │  ZMQ ← actions     │
+         │              └─────│──────────────┘
+         │              ┌─────v──────────────┐
          │              │   PYTHON BRAIN     │
          │              │   (OpenEnv)        │
          │              │                    │
+         │              │  SHM → 10D → LLM  │
+         │              │  Action [-1, 1]    │
+         │              │  → ZMQ → Bridge    │
          │              └────────────────────┘
+         └── Kernel applies nudge at next sched_switch
 ```
+| Component | Language | Latency |
+|-----------|---------|---------|
+| eBPF Sentinel | C | <1μs |
+| Rust Bridge | Rust | <1ms |
+| SmolLM2-360M | GGUF | 44ms |
+| TUI Dashboard | Rust | 100ms |
+</div>
         """)
     gr.Markdown("""
+<div style="text-align:center; padding:10px; color:#64748b; font-size:0.9em;">
+<a href="https://huggingface.co/Rayugacodes/kernelx-strategist">Model</a> ·
+<a href="https://huggingface.co/datasets/Rayugacodes/kernelx-training-data">Data</a> ·
+<a href="https://colab.research.google.com/github/pie-314/KernelX/blob/model-training-hugging-face-integration/KernelX_Training.ipynb">Colab</a> ·
+<a href="https://github.com/pie-314/KernelX">GitHub</a> ·
+Meta PyTorch OpenEnv Hackathon 2026
+</div>
     """)
 app.launch(server_name="0.0.0.0", server_port=7860)