Spaces:

anthonym21
/

world-model-demo

Sleeping

App Files Files Community

anthonym21 commited on 4 days ago

Commit

88b4be3

verified ·

1 Parent(s): 38f1411

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +205 -521

app.py CHANGED Viewed

@@ -1,584 +1,268 @@
 """
-World Model Demo - Interactive Visualization
-A Hugging Face Space demonstrating the three phases of world model learning:
-1. Exploration (Motor Babbling) - Random exploration to learn physics
-2. Dreaming (Planning) - Using learned model to plan without acting
-3. Execution - Following the plan in reality
-Based on the concept that intelligent agents build internal models of their world.
 """
 import gradio as gr
 import random
-import time
-from collections import deque
-from typing import Optional
 import json
-# ==========================================
-# 1. THE ENVIRONMENT (Reality)
-# ==========================================
-class GridEnvironment:
-    """The ground truth physics engine."""
-    def __init__(self, size: int = 4, obstacles: set = None):
         self.size = size
-        self.agent_pos = (0, 0)
-        self.goal = (size - 1, size - 1)
-        self.obstacles = obstacles if obstacles else {(1, 1), (1, 2), (2, 2)}
     def reset(self):
-        self.agent_pos = (0, 0)
-        return self.agent_pos
-    def step(self, action: int):
-        """Execute action: 0=Up, 1=Down, 2=Left, 3=Right"""
-        x, y = self.agent_pos
-        if action == 0: y -= 1   # Up
-        elif action == 1: y += 1 # Down
-        elif action == 2: x -= 1 # Left
-        elif action == 3: x += 1 # Right
-        # Check boundaries and walls
-        if 0 <= x < self.size and 0 <= y < self.size and (x, y) not in self.obstacles:
-            self.agent_pos = (x, y)
-        return self.agent_pos
-# ==========================================
-# 2. THE WORLD MODEL (The Brain)
-# ==========================================
 class WorldModel:
-    """Internal simulation learned from experience."""
     def __init__(self):
-        self.transitions = {}
-    def learn(self, state, action, next_state):
-        self.transitions[(state, action)] = next_state
     def predict(self, state, action):
-        return self.transitions.get((state, action))
-    def get_learned_states(self):
-        """Return all states the model has learned about."""
-        states = set()
-        for (state, _), next_state in self.transitions.items():
-            states.add(state)
-            states.add(next_state)
-        return states
-# ==========================================
-# 3. THE AGENT (The Controller)
-# ==========================================
-class Agent:
-    """The intelligent agent with world model."""
-    def __init__(self):
-        self.model = WorldModel()
-        self.actions = [0, 1, 2, 3]
-        self.action_names = ["↑ Up", "↓ Down", "← Left", "→ Right"]
-        self.exploration_history = []
-    def explore_step(self, env):
-        """Single exploration step."""
-        state = env.agent_pos
-        action = random.choice(self.actions)
-        next_state = env.step(action)
-        self.model.learn(state, action, next_state)
-        return {
-            'state': state,
-            'action': action,
-            'action_name': self.action_names[action],
-            'next_state': next_state,
-            'bounced': state == next_state
-        }
-    def dream_and_plan(self, start, goal):
-        """BFS planning using only the world model."""
-        queue = deque([(start, [])])
-        visited = {start}
-        search_states = []  # Track states visited during planning
-        while queue:
-            curr_state, path = queue.popleft()
-            search_states.append(curr_state)
-            if curr_state == goal:
-                return path, search_states, True
-            for action in self.actions:
-                predicted_next = self.model.predict(curr_state, action)
-                if predicted_next is not None and predicted_next not in visited:
-                    visited.add(predicted_next)
-                    new_path = path + [action]
-                    queue.append((predicted_next, new_path))
-        return None, search_states, False
-# ==========================================
-# VISUALIZATION HELPERS
-# ==========================================
-def render_grid(env: GridEnvironment, agent_pos: tuple, highlight_cells: dict = None,
-                show_model_knowledge: set = None, plan_path: list = None) -> str:
-    """
-    Render the grid as HTML.
-    highlight_cells: dict mapping (x,y) -> color class
-    show_model_knowledge: set of states the model knows about
-    plan_path: list of (x,y) positions in the planned path
-    """
-    size = env.size
-    cell_size = 60
     html = f'''
-    <style>
-        .grid-container {{
-            display: inline-grid;
-            grid-template-columns: repeat({size}, {cell_size}px);
-            gap: 2px;
-            background: #1a1a2e;
-            padding: 10px;
-            border-radius: 12px;
-            box-shadow: 0 4px 20px rgba(0,0,0,0.3);
-        }}
-        .grid-cell {{
-            width: {cell_size}px;
-            height: {cell_size}px;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            font-size: 24px;
-            border-radius: 8px;
-            transition: all 0.3s ease;
-            position: relative;
-        }}
-        .cell-empty {{ background: #16213e; }}
-        .cell-agent {{ background: #4ecca3; animation: pulse 1s infinite; }}
-        .cell-goal {{ background: #ffd369; }}
-        .cell-obstacle {{ background: #e94560; }}
-        .cell-start {{ background: #7b68ee; }}
-        .cell-explored {{ background: #2d4263; border: 2px solid #4ecca3; }}
-        .cell-path {{ background: #00adb5; }}
-        .cell-search {{ background: #533483; border: 2px dashed #9d65c9; }}
-        .cell-agent-at-goal {{ background: linear-gradient(135deg, #4ecca3, #ffd369); }}
-        @keyframes pulse {{
-            0%, 100% {{ transform: scale(1); }}
-            50% {{ transform: scale(0.95); }}
-        }}
-        .coord-label {{
-            position: absolute;
-            bottom: 2px;
-            right: 4px;
-            font-size: 9px;
-            color: rgba(255,255,255,0.4);
-        }}
-    </style>
-    <div class="grid-container">
     '''
-    highlight_cells = highlight_cells or {}
-    plan_path_set = set(plan_path) if plan_path else set()
     for y in range(size):
         for x in range(size):
-            pos = (x, y)
-            cell_class = "cell-empty"
-            content = ""
-            # Layer the cell states (order matters)
-            if show_model_knowledge and pos in show_model_knowledge:
-                cell_class = "cell-explored"
-            if pos in plan_path_set and pos != env.goal:
-                cell_class = "cell-path"
-            if pos in highlight_cells:
-                cell_class = highlight_cells[pos]
-            if pos in env.obstacles:
-                cell_class = "cell-obstacle"
-                content = "🧱"
-            elif pos == (0, 0) and pos != agent_pos:
-                cell_class = "cell-start"
-                content = "🏁"
-            elif pos == env.goal and pos != agent_pos:
-                cell_class = "cell-goal"
-                content = "⭐"
-            if pos == agent_pos:
-                if pos == env.goal:
-                    cell_class = "cell-agent-at-goal"
-                    content = "🤖⭐"
-                else:
-                    cell_class = "cell-agent"
-                    content = "🤖"
-            html += f'<div class="grid-cell {cell_class}">{content}<span class="coord-label">{x},{y}</span></div>'
-    html += '</div>'
-    return html
-def create_stats_html(rules_learned: int, states_explored: int, plan_length: int = 0,
-                      phase: str = "Ready") -> str:
-    """Create HTML for statistics display."""
-    phase_colors = {
-        "Ready": "#888",
-        "Exploring": "#4ecca3",
-        "Dreaming": "#9d65c9",
-        "Executing": "#00adb5",
-        "Complete": "#ffd369"
-    }
-    color = phase_colors.get(phase, "#888")
-    return f'''
-    <div style="
-        background: linear-gradient(135deg, #1a1a2e, #16213e);
-        padding: 20px;
-        border-radius: 12px;
-        color: white;
-        font-family: 'Segoe UI', sans-serif;
-        display: grid;
-        grid-template-columns: repeat(2, 1fr);
-        gap: 15px;
-        max-width: 400px;
-    ">
-        <div style="text-align: center; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px;">
-            <div style="font-size: 28px; font-weight: bold; color: #4ecca3;">{rules_learned}</div>
-            <div style="font-size: 12px; opacity: 0.8;">Physics Rules Learned</div>
-        </div>
-        <div style="text-align: center; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px;">
-            <div style="font-size: 28px; font-weight: bold; color: #7b68ee;">{states_explored}</div>
-            <div style="font-size: 12px; opacity: 0.8;">States Explored</div>
-        </div>
-        <div style="text-align: center; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px;">
-            <div style="font-size: 28px; font-weight: bold; color: #00adb5;">{plan_length}</div>
-            <div style="font-size: 12px; opacity: 0.8;">Plan Length</div>
-        </div>
-        <div style="text-align: center; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px;">
-            <div style="font-size: 16px; font-weight: bold; color: {color};">● {phase}</div>
-            <div style="font-size: 12px; opacity: 0.8;">Current Phase</div>
         </div>
     </div>
     '''
-# ==========================================
-# GRADIO INTERFACE
-# ==========================================
-class WorldModelDemo:
-    """Main demo controller."""
-    def __init__(self):
-        self.reset()
-    def reset(self, grid_size: int = 4, obstacle_preset: str = "Default"):
-        """Reset the demo state."""
-        obstacles = self._get_obstacles(grid_size, obstacle_preset)
-        self.env = GridEnvironment(size=grid_size, obstacles=obstacles)
-        self.agent = Agent()
-        self.plan = None
-        self.plan_positions = []
-        self.search_states = []
-        self.current_step = 0
-        self.phase = "Ready"
-        self.log = []
-        return self._render_state()
-    def _get_obstacles(self, size: int, preset: str) -> set:
-        """Get obstacle configuration based on preset."""
-        if preset == "None":
-            return set()
-        elif preset == "Default":
-            if size == 4:
-                return {(1, 1), (1, 2), (2, 2)}
-            elif size == 5:
-                return {(1, 1), (1, 2), (2, 2), (3, 1)}
-            else:
-                return {(1, 1), (2, 2), (3, 3)}
-        elif preset == "Maze":
-            if size == 4:
-                return {(1, 0), (1, 1), (1, 2), (2, 2)}
-            elif size == 5:
-                return {(1, 0), (1, 1), (1, 2), (3, 2), (3, 3), (3, 4)}
-            else:
-                return {(1, 0), (1, 1), (2, 3), (2, 4), (4, 1), (4, 2)}
-        elif preset == "Scattered":
-            if size == 4:
-                return {(0, 2), (2, 0), (2, 3)}
-            elif size == 5:
-                return {(0, 2), (2, 0), (2, 3), (4, 1)}
-            else:
-                return {(0, 2), (2, 0), (2, 4), (4, 2), (5, 0)}
-        return set()
-    def _render_state(self, highlight: dict = None) -> tuple:
-        """Render current state as HTML outputs."""
-        known_states = self.agent.model.get_learned_states()
-        grid_html = render_grid(
-            self.env,
-            self.env.agent_pos,
-            highlight_cells=highlight,
-            show_model_knowledge=known_states if self.phase != "Ready" else None,
-            plan_path=self.plan_positions if self.plan_positions else None
-        )
-        stats_html = create_stats_html(
-            rules_learned=len(self.agent.model.transitions),
-            states_explored=len(known_states),
-            plan_length=len(self.plan) if self.plan else 0,
-            phase=self.phase
-        )
-        log_text = "\n".join(self.log[-20:])  # Last 20 log entries
-        return grid_html, stats_html, log_text
-    def explore(self, steps: int = 100) -> tuple:
-        """Run exploration phase."""
-        self.phase = "Exploring"
-        self.env.reset()
-        self.log.append(f"═══ PHASE 1: EXPLORATION ({steps} steps) ═══")
-        for i in range(steps):
-            result = self.agent.explore_step(self.env)
-            if i < 10 or i % 50 == 0:  # Log first 10 and every 50th
-                bounce_str = " (BOUNCE!)" if result['bounced'] else ""
-                self.log.append(f"Step {i+1}: {result['state']} → {result['action_name']} → {result['next_state']}{bounce_str}")
-            if self.env.agent_pos == self.env.goal:
-                self.env.reset()
-        self.log.append(f"✓ Learned {len(self.agent.model.transitions)} physics rules")
-        self.log.append(f"✓ Explored {len(self.agent.model.get_learned_states())} unique states")
-        return self._render_state()
-    def dream(self) -> tuple:
-        """Run planning phase without moving in real world."""
-        self.phase = "Dreaming"
-        self.env.reset()
-        self.log.append(f"═══ PHASE 2: DREAMING ═══")
-        self.log.append(f"Planning from (0,0) to {self.env.goal}...")
-        self.log.append("(No real-world movement - pure simulation!)")
-        start = (0, 0)
-        goal = self.env.goal
-        self.plan, self.search_states, success = self.agent.dream_and_plan(start, goal)
-        if success:
-            # Convert plan to position list
-            self.plan_positions = [start]
-            pos = start
-            for action in self.plan:
-                predicted = self.agent.model.predict(pos, action)
-                if predicted:
-                    self.plan_positions.append(predicted)
-                    pos = predicted
-            path_str = " → ".join([self.agent.action_names[a] for a in self.plan])
-            self.log.append(f"✓ Plan found! Length: {len(self.plan)}")
-            self.log.append(f"  Path: {path_str}")
-            self.log.append(f"  Positions: {' → '.join(str(p) for p in self.plan_positions)}")
-        else:
-            self.log.append("✗ No path found - need more exploration!")
-            self.plan = None
-            self.plan_positions = []
-        # Highlight searched states
-        highlight = {s: "cell-search" for s in self.search_states}
-        return self._render_state(highlight)
-    def execute(self) -> tuple:
-        """Execute the plan in reality."""
-        if not self.plan:
-            self.log.append("⚠ No plan to execute! Run 'Dream' first.")
-            return self._render_state()
-        self.phase = "Executing"
-        self.env.reset()
-        self.log.append(f"═══ PHASE 3: EXECUTION ═══")
-        self.log.append(f"Start: {self.env.agent_pos}")
-        for i, action in enumerate(self.plan):
-            state = self.env.step(action)
-            self.log.append(f"  {self.agent.action_names[action]} → {state}")
-        if self.env.agent_pos == self.env.goal:
-            self.phase = "Complete"
-            self.log.append("🎉 SUCCESS! Goal reached!")
-        else:
-            self.log.append("⚠ FAILURE: Plan didn't reach goal")
-        return self._render_state()
-    def run_full_demo(self, steps: int = 200) -> tuple:
-        """Run all three phases automatically."""
-        self.reset()
-        # Phase 1
-        self.explore(steps)
-        # Phase 2
-        self.dream()
-        # Phase 3
-        if self.plan:
-            self.execute()
-        return self._render_state()
-# Create global demo instance
-demo = WorldModelDemo()
-def reset_demo(grid_size, obstacle_preset):
-    return demo.reset(int(grid_size), obstacle_preset)
-def run_explore(steps):
-    return demo.explore(int(steps))
-def run_dream():
-    return demo.dream()
-def run_execute():
-    return demo.execute()
-def run_full(steps):
-    return demo.run_full_demo(int(steps))
-# ==========================================
-# GRADIO UI
-# ==========================================
-with gr.Blocks(
-    title="World Model Demo",
-    theme=gr.themes.Soft(
-        primary_hue="teal",
-        secondary_hue="purple",
-    ),
-    css="""
-        .main-title {
-            text-align: center;
-            margin-bottom: 10px;
-            background: linear-gradient(90deg, #4ecca3, #7b68ee);
-            -webkit-background-clip: text;
-            -webkit-text-fill-color: transparent;
-        }
-        .phase-btn { min-width: 120px; }
-        footer { display: none !important; }
-    """
-) as interface:
     gr.Markdown("""
     # 🧠 World Model Demo
-    ### How Intelligent Agents Learn to Dream and Plan
-    This interactive demo shows how an AI agent builds an internal model of its world through three phases:
-    | Phase | Description |
-    |-------|-------------|
-    | 🔍 **Exploration** | Random movement to discover physics rules ("motor babbling") |
-    | 💭 **Dreaming** | Planning a path using *only* the internal model (no real movement!) |
-    | 🚀 **Execution** | Following the imagined plan in the real world |
-    ---
     """)
     with gr.Row():
         with gr.Column(scale=2):
-            grid_display = gr.HTML(label="Grid World")
         with gr.Column(scale=1):
-            stats_display = gr.HTML(label="Statistics")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### ⚙️ Configuration")
             with gr.Row():
-                grid_size = gr.Dropdown(
-                    choices=["4", "5", "6"],
-                    value="4",
-                    label="Grid Size"
-                )
-                obstacle_preset = gr.Dropdown(
-                    choices=["None", "Default", "Maze", "Scattered"],
-                    value="Default",
-                    label="Obstacles"
-                )
-            exploration_steps = gr.Slider(
-                minimum=50, maximum=500, value=200, step=50,
-                label="Exploration Steps"
-            )
-            gr.Markdown("### 🎮 Controls")
             with gr.Row():
-                reset_btn = gr.Button("🔄 Reset", variant="secondary")
-                full_btn = gr.Button("▶️ Run All Phases", variant="primary")
-            gr.Markdown("### 📍 Step-by-Step")
-            with gr.Row():
-                explore_btn = gr.Button("1️⃣ Explore", elem_classes="phase-btn")
-                dream_btn = gr.Button("2️⃣ Dream", elem_classes="phase-btn")
-                execute_btn = gr.Button("3️⃣ Execute", elem_classes="phase-btn")
-        with gr.Column():
-            log_display = gr.Textbox(
-                label="📋 Activity Log",
-                lines=15,
-                max_lines=20,
-                interactive=False
-            )
-    gr.Markdown("""
-    ---
-    ### 📚 How It Works
-    **The Key Insight:** The agent's "brain" (World Model) is a simple dictionary that maps
-    `(state, action) → next_state`. During **Dreaming**, the agent searches through this
-    dictionary using BFS - it never calls the real environment!
-    **Why This Matters:** This is the foundation of how advanced AI systems (like MuZero,
-    Dreamer, and world models in robotics) learn to plan. Instead of trial-and-error in
-    reality (expensive, dangerous), they simulate futures in their head.
-    **Legend:**
-    - 🤖 Agent | ⭐ Goal | 🏁 Start | 🧱 Wall
-    - 🟢 Border = Explored states | 🟣 Dashed = States searched during planning | 🔵 = Planned path
-    ---
-    *Built with ❤️ using Gradio • Concept: World Models for Intelligent Agents*
-    """)
-    # Event handlers
-    outputs = [grid_display, stats_display, log_display]
-    reset_btn.click(reset_demo, inputs=[grid_size, obstacle_preset], outputs=outputs)
-    grid_size.change(reset_demo, inputs=[grid_size, obstacle_preset], outputs=outputs)
-    obstacle_preset.change(reset_demo, inputs=[grid_size, obstacle_preset], outputs=outputs)
-    explore_btn.click(run_explore, inputs=[exploration_steps], outputs=outputs)
-    dream_btn.click(run_dream, outputs=outputs)
-    execute_btn.click(run_execute, outputs=outputs)
-    full_btn.click(run_full, inputs=[exploration_steps], outputs=outputs)
-    # Initialize on load
-    interface.load(lambda: demo.reset(), outputs=outputs)
 if __name__ == "__main__":
-    interface.launch()

 """
+World Model Demo - Interactive AI Planning Visualization
+Educational demonstration of model-based reinforcement learning concepts
 """
 import gradio as gr
 import random
 import json
+# ============================================================================
+# World Model Core Classes
+# ============================================================================
+class GridWorld:
+    """Simple grid environment for world model demonstration"""
+    def __init__(self, size=8):
         self.size = size
+        self.reset()
     def reset(self):
+        self.agent_pos = [1, 1]
+        self.goal_pos = [self.size - 2, self.size - 2]
+        self.obstacles = self._generate_obstacles()
+        self.steps = 0
+        return self._get_state()
+    def _generate_obstacles(self):
+        obstacles = set()
+        num_obstacles = self.size
+        while len(obstacles) < num_obstacles:
+            x, y = random.randint(0, self.size-1), random.randint(0, self.size-1)
+            if [x, y] != self.agent_pos and [x, y] != self.goal_pos:
+                obstacles.add((x, y))
+        return obstacles
+    def _get_state(self):
+        return {
+            'agent': self.agent_pos.copy(),
+            'goal': self.goal_pos,
+            'obstacles': list(self.obstacles),
+            'size': self.size,
+            'steps': self.steps
+        }
+    def step(self, action):
+        dx, dy = {'up': (0, -1), 'down': (0, 1), 'left': (-1, 0), 'right': (1, 0)}.get(action, (0, 0))
+        new_x = max(0, min(self.size - 1, self.agent_pos[0] + dx))
+        new_y = max(0, min(self.size - 1, self.agent_pos[1] + dy))
+        if (new_x, new_y) not in self.obstacles:
+            self.agent_pos = [new_x, new_y]
+        self.steps += 1
+        done = self.agent_pos == self.goal_pos
+        reward = 10 if done else -0.1
+        return self._get_state(), reward, done
 class WorldModel:
+    """Simple world model that learns to predict state transitions"""
     def __init__(self):
+        self.transition_counts = {}
+        self.prediction_accuracy = 0.5
+        self.total_predictions = 0
+        self.correct_predictions = 0
     def predict(self, state, action):
+        """Predict next state given current state and action"""
+        agent = tuple(state['agent'])
+        key = (agent, action)
+        if key in self.transition_counts:
+            predicted = list(self.transition_counts[key])
+            confidence = min(0.95, 0.5 + self.correct_predictions / max(1, self.total_predictions) * 0.5)
+        else:
+            dx, dy = {'up': (0, -1), 'down': (0, 1), 'left': (-1, 0), 'right': (1, 0)}.get(action, (0, 0))
+            predicted = [agent[0] + dx, agent[1] + dy]
+            confidence = 0.3
+        return predicted, confidence
+    def learn(self, state, action, next_state):
+        """Learn from observed transition"""
+        agent = tuple(state['agent'])
+        next_agent = tuple(next_state['agent'])
+        key = (agent, action)
+        predicted, _ = self.predict(state, action)
+        self.total_predictions += 1
+        if tuple(predicted) == next_agent:
+            self.correct_predictions += 1
+        self.transition_counts[key] = next_agent
+        self.prediction_accuracy = self.correct_predictions / max(1, self.total_predictions)
+# ============================================================================
+# Visualization
+# ============================================================================
+def render_grid_html(state, prediction=None, phase="observe"):
+    """Render the grid as an HTML table"""
+    size = state['size']
+    agent = state['agent']
+    goal = state['goal']
+    obstacles = set(map(tuple, state['obstacles']))
+    colors = {
+        'observe': '#3b82f6',
+        'predict': '#8b5cf6',
+        'plan': '#f59e0b',
+        'act': '#22c55e',
+        'learn': '#ec4899'
+    }
+    phase_color = colors.get(phase, '#6b7280')
     html = f'''
+    <div style="text-align: center; font-family: system-ui, sans-serif;">
+        <div style="display: inline-block; background: #1e293b; padding: 20px; border-radius: 12px; box-shadow: 0 4px 6px rgba(0,0,0,0.3);">
+            <div style="margin-bottom: 10px; color: {phase_color}; font-weight: bold; font-size: 18px;">
+                Phase: {phase.upper()}
+            </div>
+            <table style="border-collapse: collapse; margin: auto;">
     '''
     for y in range(size):
+        html += '<tr>'
         for x in range(size):
+            bg = '#334155'
+            content = ''
+            border = '1px solid #475569'
+            if (x, y) in obstacles:
+                bg = '#7f1d1d'
+                content = '🧱'
+            elif [x, y] == goal:
+                bg = '#166534'
+                content = '⭐'
+            elif [x, y] == agent:
+                bg = '#1d4ed8'
+                content = '🤖'
+            if prediction and [x, y] == prediction:
+                border = f'3px solid {phase_color}'
+            html += f'''
+                <td style="width: 45px; height: 45px; background: {bg};
+                    border: {border}; text-align: center; font-size: 20px;">
+                    {content}
+                </td>
+            '''
+        html += '</tr>'
+    html += '''
+            </table>
+            <div style="margin-top: 15px; color: #94a3b8; font-size: 14px;">
+                🤖 Agent | ⭐ Goal | 🧱 Obstacle
+            </div>
         </div>
     </div>
     '''
+    return html
+# ============================================================================
+# Gradio Interface
+# ============================================================================
+world = GridWorld()
+model = WorldModel()
+current_state = world.reset()
+current_phase = "observe"
+def get_display():
+    global current_state, current_phase
+    html = render_grid_html(current_state, phase=current_phase)
+    stats = f"Steps: {current_state['steps']} | Model Accuracy: {model.prediction_accuracy:.1%}"
+    return html, stats
+def do_action(action):
+    global current_state, current_phase, world, model
+    current_phase = "predict"
+    prediction, confidence = model.predict(current_state, action)
+    current_phase = "act"
+    old_state = current_state.copy()
+    current_state, reward, done = world.step(action)
+    current_phase = "learn"
+    model.learn(old_state, action, current_state)
+    if done:
+        current_phase = "observe"
+        current_state = world.reset()
+        message = "🎉 Goal reached! Environment reset."
+    else:
+        current_phase = "observe"
+        message = f"Moved {action}. Prediction confidence: {confidence:.1%}"
+    html, stats = get_display()
+    return html, stats, message
+def reset_env():
+    global current_state, current_phase, world, model
+    current_state = world.reset()
+    model = WorldModel()
+    current_phase = "observe"
+    html, stats = get_display()
+    return html, stats, "Environment reset!"
+# Build the interface
+with gr.Blocks(title="World Model Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🧠 World Model Demo
+    Interactive demonstration of how AI agents build internal models of the world.
+    **The Learning Cycle:**
+    1. **Observe** - Agent perceives current state
+    2. **Predict** - World model predicts action outcomes
+    3. **Plan** - Agent evaluates possible futures
+    4. **Act** - Execute chosen action
+    5. **Learn** - Update model from observed outcome
     """)
     with gr.Row():
         with gr.Column(scale=2):
+            grid_display = gr.HTML(label="Environment")
+            stats_display = gr.Textbox(label="Statistics", interactive=False)
+            message_display = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=1):
+            gr.Markdown("### Controls")
             with gr.Row():
+                gr.Button("").click(lambda: None)
+                up_btn = gr.Button("⬆️ Up")
+                gr.Button("").click(lambda: None)
             with gr.Row():
+                left_btn = gr.Button("⬅️ Left")
+                down_btn = gr.Button("⬇️ Down")
+                right_btn = gr.Button("➡️ Right")
+            reset_btn = gr.Button("🔄 Reset", variant="secondary")
+            gr.Markdown("""
+            ### About World Models
+            World models are internal representations that AI agents use to:
+            - Simulate possible futures
+            - Plan without trial-and-error
+            - Learn efficiently from experience
+            Used in: MuZero, Dreamer, PlaNet
+            """)
+    # Connect buttons
+    up_btn.click(lambda: do_action("up"), outputs=[grid_display, stats_display, message_display])
+    down_btn.click(lambda: do_action("down"), outputs=[grid_display, stats_display, message_display])
+    left_btn.click(lambda: do_action("left"), outputs=[grid_display, stats_display, message_display])
+    right_btn.click(lambda: do_action("right"), outputs=[grid_display, stats_display, message_display])
+    reset_btn.click(reset_env, outputs=[grid_display, stats_display, message_display])
+    # Initial display
+    demo.load(get_display, outputs=[grid_display, stats_display])
 if __name__ == "__main__":
+    demo.launch()