Spaces:

anthonym21
/

world-model-demo

Sleeping

App Files Files Community

anthonym21 commited on Jan 12

Commit

5bcb831

verified ·

1 Parent(s): e345b60

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +273 -164

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ Educational demonstration of model-based reinforcement learning concepts
 import gradio as gr
 import random
-import json
 # ============================================================================
 # World Model Core Classes
@@ -14,24 +14,28 @@ import json
 class GridWorld:
     """Simple grid environment for world model demonstration"""
-    def __init__(self, size=8):
         self.size = size
         self.reset()
     def reset(self):
-        self.agent_pos = [1, 1]
-        self.goal_pos = [self.size - 2, self.size - 2]
         self.obstacles = self._generate_obstacles()
         self.steps = 0
         return self._get_state()
     def _generate_obstacles(self):
         obstacles = set()
-        num_obstacles = self.size
-        while len(obstacles) < num_obstacles:
             x, y = random.randint(0, self.size-1), random.randint(0, self.size-1)
             if [x, y] != self.agent_pos and [x, y] != self.goal_pos:
-                obstacles.add((x, y))
         return obstacles
     def _get_state(self):
@@ -44,7 +48,8 @@ class GridWorld:
         }
     def step(self, action):
-        dx, dy = {'up': (0, -1), 'down': (0, 1), 'left': (-1, 0), 'right': (1, 0)}.get(action, (0, 0))
         new_x = max(0, min(self.size - 1, self.agent_pos[0] + dx))
         new_y = max(0, min(self.size - 1, self.agent_pos[1] + dy))
@@ -53,86 +58,136 @@ class GridWorld:
         self.steps += 1
         done = self.agent_pos == self.goal_pos
-        reward = 10 if done else -0.1
-        return self._get_state(), reward, done
-class WorldModel:
-    """Simple world model that learns to predict state transitions"""
     def __init__(self):
-        self.transition_counts = {}
-        self.prediction_accuracy = 0.5
-        self.total_predictions = 0
-        self.correct_predictions = 0
-    def predict(self, state, action):
-        """Predict next state given current state and action"""
-        agent = tuple(state['agent'])
-        key = (agent, action)
-        if key in self.transition_counts:
-            predicted = list(self.transition_counts[key])
-            confidence = min(0.95, 0.5 + self.correct_predictions / max(1, self.total_predictions) * 0.5)
-        else:
-            dx, dy = {'up': (0, -1), 'down': (0, 1), 'left': (-1, 0), 'right': (1, 0)}.get(action, (0, 0))
-            predicted = [agent[0] + dx, agent[1] + dy]
-            confidence = 0.3
-        return predicted, confidence
-    def learn(self, state, action, next_state):
-        """Learn from observed transition"""
-        agent = tuple(state['agent'])
-        next_agent = tuple(next_state['agent'])
-        key = (agent, action)
-        predicted, _ = self.predict(state, action)
-        self.total_predictions += 1
-        if tuple(predicted) == next_agent:
-            self.correct_predictions += 1
-        self.transition_counts[key] = next_agent
-        self.prediction_accuracy = self.correct_predictions / max(1, self.total_predictions)
 # ============================================================================
 # Visualization
 # ============================================================================
-def render_grid_html(state, phase="observe", prediction=None):
-    """Render the grid as HTML with phase-appropriate styling"""
     agent = state['agent']
     goal = state['goal']
     obstacles = set(tuple(o) if isinstance(o, list) else o for o in state['obstacles'])
     size = state['size']
-    phase_colors = {
-        'observe': '#3b82f6',  # blue
-        'predict': '#f59e0b',  # amber
-        'plan': '#8b5cf6',     # purple
-        'act': '#10b981',      # green
-        'learn': '#ec4899'     # pink
     }
-    phase_color = phase_colors.get(phase, '#6b7280')
     html = f'''
     <div style="text-align: center; font-family: system-ui, sans-serif;">
-        <div style="display: inline-block; background: #1e293b; padding: 20px; border-radius: 12px; box-shadow: 0 4px 6px rgba(0,0,0,0.3);">
-            <div style="margin-bottom: 10px; color: {phase_color}; font-weight: bold; font-size: 18px;">
-                Phase: {phase.upper()}
             </div>
-            <table style="border-collapse: collapse; margin: auto;">
     '''
     for y in range(size):
         html += '<tr>'
         for x in range(size):
             bg = '#334155'
             content = ''
-            border = '1px solid #475569'
             if (x, y) in obstacles:
-                bg = '#7f1d1d'
                 content = '🧱'
             elif [x, y] == goal:
                 bg = '#166534'
@@ -140,13 +195,16 @@ def render_grid_html(state, phase="observe", prediction=None):
             elif [x, y] == agent:
                 bg = '#1d4ed8'
                 content = '🤖'
-            if prediction and [x, y] == prediction:
-                border = f'3px solid {phase_color}'
             html += f'''
-                <td style="width: 45px; height: 45px; background: {bg};
-                    border: {border}; text-align: center; font-size: 20px;">
                     {content}
                 </td>
             '''
@@ -154,8 +212,51 @@ def render_grid_html(state, phase="observe", prediction=None):
     html += '''
             </table>
-            <div style="margin-top: 15px; color: #94a3b8; font-size: 14px;">
-                🤖 Agent | ⭐ Goal | 🧱 Obstacle
             </div>
         </div>
     </div>
@@ -163,147 +264,155 @@ def render_grid_html(state, phase="observe", prediction=None):
     return html
 # ============================================================================
-# Gradio Interface
 # ============================================================================
-world = GridWorld()
-model = WorldModel()
 current_state = world.reset()
-current_phase = "observe"
-def get_display():
-    global current_state, current_phase
-    html = render_grid_html(current_state, phase=current_phase)
-    stats = f"Steps: {current_state['steps']} | Model Accuracy: {model.prediction_accuracy:.1%}"
-    return html, stats
-def do_action(action):
-    global current_state, current_phase, world, model
-    current_phase = "predict"
-    prediction, confidence = model.predict(current_state, action)
-    current_phase = "act"
-    old_state = current_state.copy()
-    current_state, reward, done = world.step(action)
-    current_phase = "learn"
-    model.learn(old_state, action, current_state)
     if done:
-        current_phase = "observe"
-        current_state = world.reset()
-        message = "🎉 Goal reached! Environment reset."
     else:
-        current_phase = "observe"
-        message = f"Moved {action}. Prediction confidence: {confidence:.1%}"
-    html, stats = get_display()
-    return html, stats, message
-def reset_env():
-    global current_state, current_phase, world, model
-    world = GridWorld()  # Create fresh world
-    model = WorldModel()  # Create fresh model
-    current_state = world.reset()
-    current_phase = "observe"
-    html, stats = get_display()
-    return html, stats, "Environment reset!"
-# Build the interface
 with gr.Blocks(title="World Model Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🧠 World Model Demo
-    **What is this?** An interactive demonstration of how AI agents can build internal "mental models"
-    of the world to plan and reason, rather than just reacting to inputs.
     """)
     with gr.Row():
-        with gr.Column(scale=2):
-            grid_display = gr.HTML(label="Environment")
-            stats_display = gr.Textbox(label="Statistics", interactive=False)
-            message_display = gr.Textbox(label="Status", interactive=False)
-        with gr.Column(scale=1):
-            gr.Markdown("### Controls")
-            with gr.Row():
-                gr.Button("", visible=False, min_width=1)
-                up_btn = gr.Button("⬆️ Up")
-                gr.Button("", visible=False, min_width=1)
-            with gr.Row():
-                left_btn = gr.Button("⬅️ Left")
-                down_btn = gr.Button("⬇️ Down")
-                right_btn = gr.Button("➡️ Right")
             reset_btn = gr.Button("🔄 Reset", variant="secondary")
-            gr.Markdown("""
-            ---
-            **The Learning Cycle:**
-            1. 🔍 **Observe** - Perceive state
-            2. 💭 **Predict** - Imagine outcomes
-            3. ⚡ **Act** - Execute action
-            4. 📚 **Learn** - Update model
-            """)
-    # Educational content in collapsible sections
-    with gr.Accordion("📖 What is a World Model?", open=False):
-        gr.Markdown("""
-        A **world model** is an internal representation that an AI agent uses to *simulate* the
-        environment without actually interacting with it. Think of it as the agent's "imagination."
-        **Instead of pure trial-and-error, an agent with a world model can:**
-        - 🎯 **Imagine** possible futures ("what if I do X?")
-        - ⚖️ **Evaluate** which imagined future looks best
-        - 🗺️ **Plan** a sequence of actions to reach that future
-        - ✅ **Act** with confidence, having already "seen" the outcome
-        **Real examples:** MuZero (mastered Go/Chess without knowing rules), Dreamer (robot control),
-        IRIS (Atari from pixels)
-        """)
-    with gr.Accordion("🤔 How is this different from ChatGPT/Claude?", open=False):
         gr.Markdown("""
         | Aspect | Language Model (GPT, Claude) | World Model (This Demo) |
         |--------|------------------------------|-------------------------|
-        | **Predicts** | Next *word* in a sequence | Next *state* given an action |
-        | **Training** | Text prediction | Reward from environment |
         | **"Thinking"** | Generates plausible text | Simulates physical outcomes |
         | **Planning** | Implicit (chain-of-thought) | Explicit (tree search) |
-        | **Grounding** | Statistical text patterns | Causal dynamics |
-        **Example:**
-        - **LLM**: "If I push a ball off a table..." → generates plausible *text*
-        - **World Model**: state(ball on table) + action(push) → predicts actual *trajectory*
-        Language models learn *what sounds right*. World models learn *what actually happens*.
         """)
     with gr.Accordion("🔬 Why does this matter for AI Safety?", open=False):
         gr.Markdown("""
-        World models are crucial for AI safety research because:
-        - **Predictability**: Agents that plan can be analyzed - we can inspect what futures they're considering
-        - **Corrigibility**: Planning agents can incorporate "avoid irreversible actions" into their search
-        - **Interpretability**: The model's predictions can be examined for accuracy and bias
-        - **Scalable Oversight**: Humans can audit the agent's "reasoning" by inspecting simulated futures
-        Understanding how AI systems model the world helps us build systems we can trust and verify.
-        ---
-        *Created by [Anthony Maio](https://huggingface.co/anthonym21) as an educational resource*
         """)
     # Connect buttons
-    up_btn.click(lambda: do_action("up"), outputs=[grid_display, stats_display, message_display])
-    down_btn.click(lambda: do_action("down"), outputs=[grid_display, stats_display, message_display])
-    left_btn.click(lambda: do_action("left"), outputs=[grid_display, stats_display, message_display])
-    right_btn.click(lambda: do_action("right"), outputs=[grid_display, stats_display, message_display])
-    reset_btn.click(reset_env, outputs=[grid_display, stats_display, message_display])
-    # Initial display
-    demo.load(get_display, outputs=[grid_display, stats_display])
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import random
+import time
 # ============================================================================
 # World Model Core Classes
 class GridWorld:
     """Simple grid environment for world model demonstration"""
+    def __init__(self, size=6):
         self.size = size
         self.reset()
     def reset(self):
+        self.agent_pos = [0, 0]
+        self.goal_pos = [self.size - 1, self.size - 1]
         self.obstacles = self._generate_obstacles()
         self.steps = 0
         return self._get_state()
     def _generate_obstacles(self):
         obstacles = set()
+        num_obstacles = self.size - 1
+        attempts = 0
+        while len(obstacles) < num_obstacles and attempts < 100:
             x, y = random.randint(0, self.size-1), random.randint(0, self.size-1)
             if [x, y] != self.agent_pos and [x, y] != self.goal_pos:
+                # Don't block the only path
+                if not (x == 0 and y == 1) and not (x == 1 and y == 0):
+                    obstacles.add((x, y))
+            attempts += 1
         return obstacles
     def _get_state(self):
         }
     def step(self, action):
+        moves = {'up': (0, -1), 'down': (0, 1), 'left': (-1, 0), 'right': (1, 0)}
+        dx, dy = moves.get(action, (0, 0))
         new_x = max(0, min(self.size - 1, self.agent_pos[0] + dx))
         new_y = max(0, min(self.size - 1, self.agent_pos[1] + dy))
         self.steps += 1
         done = self.agent_pos == self.goal_pos
+        return self._get_state(), done
+    def copy(self):
+        new_world = GridWorld(self.size)
+        new_world.agent_pos = self.agent_pos.copy()
+        new_world.goal_pos = self.goal_pos.copy()
+        new_world.obstacles = self.obstacles.copy()
+        new_world.steps = self.steps
+        return new_world
+class WorldModelAgent:
+    """Agent that uses a world model to plan ahead"""
     def __init__(self):
+        self.imagination_steps = []
+        self.best_path = []
+        self.action_values = {}
+    def imagine_action(self, world, action):
+        """Use world model to predict outcome without actually taking action"""
+        imagined_world = world.copy()
+        imagined_state, done = imagined_world.step(action)
+        return imagined_state, done, imagined_world
+    def evaluate_position(self, pos, goal):
+        """Simple heuristic: negative manhattan distance to goal"""
+        return -(abs(pos[0] - goal[0]) + abs(pos[1] - goal[1]))
+    def plan(self, world, depth=3):
+        """
+        Plan ahead by imagining future states.
+        This is what makes world models special - we can "think" before acting.
+        """
+        self.imagination_steps = []
+        self.action_values = {}
+        actions = ['up', 'down', 'left', 'right']
+        for action in actions:
+            # Imagine taking this action
+            imagined_state, done, imagined_world = self.imagine_action(world, action)
+            # Record what we imagined
+            self.imagination_steps.append({
+                'action': action,
+                'predicted_pos': imagined_state['agent'].copy(),
+                'depth': 1
+            })
+            if done:
+                # Found goal!
+                self.action_values[action] = 100
+                continue
+            # Look deeper - imagine further into the future
+            value = self.evaluate_position(imagined_state['agent'], imagined_state['goal'])
+            # Plan 2 steps ahead
+            best_future_value = -999
+            for next_action in actions:
+                future_state, future_done, _ = self.imagine_action(imagined_world, next_action)
+                self.imagination_steps.append({
+                    'action': f"{action}→{next_action}",
+                    'predicted_pos': future_state['agent'].copy(),
+                    'depth': 2
+                })
+                if future_done:
+                    best_future_value = 100
+                    break
+                future_value = self.evaluate_position(future_state['agent'], future_state['goal'])
+                best_future_value = max(best_future_value, future_value)
+            self.action_values[action] = value + 0.9 * best_future_value
+        # Return best action
+        best_action = max(self.action_values, key=self.action_values.get)
+        return best_action, self.action_values, self.imagination_steps
 # ============================================================================
 # Visualization
 # ============================================================================
+def render_grid(state, phase="observe", imagined_positions=None, highlight_action=None):
+    """Render the grid as HTML"""
     agent = state['agent']
     goal = state['goal']
     obstacles = set(tuple(o) if isinstance(o, list) else o for o in state['obstacles'])
     size = state['size']
+    phase_info = {
+        'observe': ('🔍 OBSERVE', '#3b82f6', 'Perceiving current state...'),
+        'imagine': ('💭 IMAGINE', '#f59e0b', 'Simulating possible futures...'),
+        'evaluate': ('⚖️ EVALUATE', '#8b5cf6', 'Scoring each path...'),
+        'act': ('⚡ ACT', '#10b981', 'Executing best action!'),
     }
+    phase_name, phase_color, phase_desc = phase_info.get(phase, ('', '#6b7280', ''))
     html = f'''
     <div style="text-align: center; font-family: system-ui, sans-serif;">
+        <div style="display: inline-block; background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+                    padding: 24px; border-radius: 16px; box-shadow: 0 8px 32px rgba(0,0,0,0.4);">
+            <div style="margin-bottom: 8px; color: {phase_color}; font-weight: bold; font-size: 22px;
+                        text-shadow: 0 0 20px {phase_color}40;">
+                {phase_name}
+            </div>
+            <div style="margin-bottom: 16px; color: #94a3b8; font-size: 14px;">
+                {phase_desc}
             </div>
+            <table style="border-collapse: collapse; margin: auto; border-radius: 8px; overflow: hidden;">
     '''
+    # Convert imagined positions to set for easy lookup
+    imagined_set = set()
+    if imagined_positions:
+        for pos in imagined_positions:
+            imagined_set.add(tuple(pos))
     for y in range(size):
         html += '<tr>'
         for x in range(size):
             bg = '#334155'
             content = ''
+            border = '2px solid #475569'
+            opacity = '1'
             if (x, y) in obstacles:
+                bg = '#991b1b'
                 content = '🧱'
             elif [x, y] == goal:
                 bg = '#166534'
             elif [x, y] == agent:
                 bg = '#1d4ed8'
                 content = '🤖'
+            elif (x, y) in imagined_set:
+                # Show imagined positions as ghost agents
+                bg = '#475569'
+                content = '👻'
+                border = f'2px dashed {phase_color}'
             html += f'''
+                <td style="width: 50px; height: 50px; background: {bg};
+                    border: {border}; text-align: center; font-size: 24px;
+                    transition: all 0.3s ease;">
                     {content}
                 </td>
             '''
     html += '''
             </table>
+            <div style="margin-top: 16px; color: #64748b; font-size: 13px;">
+                🤖 Agent | ⭐ Goal | 🧱 Wall | 👻 Imagined Position
+            </div>
+        </div>
+    </div>
+    '''
+    return html
+def render_thinking(action_values, imagination_steps, best_action):
+    """Render the agent's thinking process"""
+    if not action_values:
+        return "<div style='color: #64748b; text-align: center; padding: 20px;'>Click 'Think & Move' to see the agent plan!</div>"
+    html = '''
+    <div style="font-family: system-ui, sans-serif; padding: 16px; background: #1e293b; border-radius: 12px;">
+        <h3 style="color: #f59e0b; margin-top: 0;">🧠 Agent's Reasoning</h3>
+        <p style="color: #94a3b8; font-size: 14px;">The agent imagined taking each action and predicted the outcomes:</p>
+        <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 12px; margin-top: 12px;">
+    '''
+    action_symbols = {'up': '⬆️', 'down': '⬇️', 'left': '⬅️', 'right': '➡️'}
+    for action, value in sorted(action_values.items(), key=lambda x: -x[1]):
+        is_best = action == best_action
+        border_color = '#10b981' if is_best else '#475569'
+        bg = '#064e3b' if is_best else '#334155'
+        label = ' ✓ BEST' if is_best else ''
+        html += f'''
+        <div style="background: {bg}; border: 2px solid {border_color}; border-radius: 8px; padding: 12px; text-align: center;">
+            <div style="font-size: 24px;">{action_symbols.get(action, '?')}</div>
+            <div style="color: #e2e8f0; font-weight: bold; margin-top: 4px;">{action.upper()}{label}</div>
+            <div style="color: #94a3b8; font-size: 13px;">Score: {value:.1f}</div>
+        </div>
+        '''
+    html += '''
+        </div>
+        <div style="margin-top: 16px; padding: 12px; background: #0f172a; border-radius: 8px;">
+            <div style="color: #10b981; font-weight: bold;">💡 Why this works:</div>
+            <div style="color: #94a3b8; font-size: 13px; margin-top: 8px;">
+                The agent <b>imagined</b> each possible action, <b>predicted</b> where it would end up,
+                and <b>evaluated</b> how close that gets to the goal. It can even imagine 2 steps ahead!
+                <br><br>
+                This is different from trial-and-error learning — the agent "thinks" before acting.
             </div>
         </div>
     </div>
     return html
 # ============================================================================
+# Global State
 # ============================================================================
+world = GridWorld(6)
+agent = WorldModelAgent()
 current_state = world.reset()
+def reset_game():
+    global world, agent, current_state
+    world = GridWorld(6)
+    agent = WorldModelAgent()
+    current_state = world.reset()
+    grid_html = render_grid(current_state, phase="observe")
+    thinking_html = "<div style='color: #64748b; text-align: center; padding: 20px;'>Click <b>'Think & Move'</b> to watch the agent plan!</div>"
+    status = "🔄 New environment! Click 'Think & Move' to see the world model in action."
+    return grid_html, thinking_html, status
+def think_and_move():
+    """Main function: Agent thinks using world model, then acts"""
+    global current_state, world, agent
+    # Check if already at goal
+    if current_state['agent'] == current_state['goal']:
+        return reset_game()
+    # Phase 1: Observe (already done - we have current_state)
+    # Phase 2: Imagine & Evaluate - Plan using world model
+    best_action, action_values, imagination_steps = agent.plan(world)
+    # Get imagined positions for visualization
+    imagined_positions = [step['predicted_pos'] for step in imagination_steps if step['depth'] == 1]
+    # Show imagination phase
+    grid_html = render_grid(current_state, phase="imagine", imagined_positions=imagined_positions)
+    thinking_html = render_thinking(action_values, imagination_steps, best_action)
+    # Phase 3: Act - Execute the best action
+    current_state, done = world.step(best_action)
+    # Update grid to show result
+    grid_html = render_grid(current_state, phase="act" if not done else "observe")
     if done:
+        status = f"🎉 Goal reached in {current_state['steps']} steps! Click 'Reset' for a new puzzle."
     else:
+        status = f"Step {current_state['steps']}: Chose {best_action.upper()} (score: {action_values[best_action]:.1f})"
+    return grid_html, thinking_html, status
+def manual_move(action):
+    """Let user move manually to compare with agent"""
+    global current_state, world
+    if current_state['agent'] == current_state['goal']:
+        return reset_game()
+    current_state, done = world.step(action)
+    grid_html = render_grid(current_state, phase="observe")
+    thinking_html = "<div style='color: #64748b; text-align: center; padding: 20px;'>You moved manually. Click 'Think & Move' to see how the agent would plan!</div>"
+    if done:
+        status = f"🎉 You reached the goal in {current_state['steps']} steps!"
+    else:
+        status = f"You moved {action}. Steps: {current_state['steps']}"
+    return grid_html, thinking_html, status
+# ============================================================================
+# Gradio Interface
+# ============================================================================
 with gr.Blocks(title="World Model Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🧠 World Model Demo
+    **Watch an AI agent "think" before it acts!**
+    Unlike reactive AI that just responds to inputs, this agent uses a **world model** to:
+    1. **Imagine** what would happen if it took each action
+    2. **Evaluate** which imagined future is best
+    3. **Act** based on its mental simulation
+    👉 **Click "Think & Move"** to watch the agent plan its path to the ⭐ goal!
     """)
     with gr.Row():
+        with gr.Column(scale=3):
+            grid_display = gr.HTML()
+            status_display = gr.Textbox(label="Status", interactive=False)
+        with gr.Column(scale=2):
+            thinking_display = gr.HTML()
+            gr.Markdown("### 🎮 Controls")
+            think_btn = gr.Button("🧠 Think & Move", variant="primary", size="lg")
             reset_btn = gr.Button("🔄 Reset", variant="secondary")
+            gr.Markdown("---")
+            gr.Markdown("**Manual controls** (to compare with agent):")
+            with gr.Row():
+                up_btn = gr.Button("⬆️")
+            with gr.Row():
+                left_btn = gr.Button("⬅️")
+                down_btn = gr.Button("⬇️")
+                right_btn = gr.Button("➡️")
+    with gr.Accordion("📖 What makes this different from ChatGPT/Claude?", open=False):
         gr.Markdown("""
         | Aspect | Language Model (GPT, Claude) | World Model (This Demo) |
         |--------|------------------------------|-------------------------|
+        | **Predicts** | Next *word* in text | Next *state* given action |
         | **"Thinking"** | Generates plausible text | Simulates physical outcomes |
         | **Planning** | Implicit (chain-of-thought) | Explicit (tree search) |
+        **The key insight:** This agent can "imagine" taking actions and see the results
+        *before* committing to them in the real world. It's like planning your route
+        on a map before driving.
+        **Real examples:** MuZero (mastered Chess/Go without knowing rules),
+        Dreamer (robot control), IRIS (Atari games)
         """)
     with gr.Accordion("🔬 Why does this matter for AI Safety?", open=False):
         gr.Markdown("""
+        World models are important for AI safety because:
+        - **Predictability**: We can inspect what futures the agent is considering
+        - **Interpretability**: The agent's "reasoning" is explicit, not hidden
+        - **Control**: We can verify the agent isn't planning harmful actions
+        - **Corrigibility**: Planning agents can incorporate "avoid irreversible actions"
+        Understanding how AI systems model the world helps us build systems we can trust.
         """)
     # Connect buttons
+    think_btn.click(think_and_move, outputs=[grid_display, thinking_display, status_display])
+    reset_btn.click(reset_game, outputs=[grid_display, thinking_display, status_display])
+    up_btn.click(lambda: manual_move("up"), outputs=[grid_display, thinking_display, status_display])
+    down_btn.click(lambda: manual_move("down"), outputs=[grid_display, thinking_display, status_display])
+    left_btn.click(lambda: manual_move("left"), outputs=[grid_display, thinking_display, status_display])
+    right_btn.click(lambda: manual_move("right"), outputs=[grid_display, thinking_display, status_display])
+    # Initialize
+    demo.load(reset_game, outputs=[grid_display, thinking_display, status_display])
 if __name__ == "__main__":
     demo.launch()