Spaces:

kai2ser
/

warehouse-gridworld

Sleeping

App Files Files Community

kai2ser commited on Apr 27

Commit

c91185e

verified ·

1 Parent(s): 4ef69fc

Initial upload: Warehouse GridWorld Gradio app

Browse files

Files changed (4) hide show

README.md +37 -6
__pycache__/app.cpython-314.pyc +0 -0
app.py +383 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,12 +1,43 @@
 ---
-title: Warehouse Gridworld
-emoji: 🏢
-colorFrom: purple
-colorTo: red
 sdk: gradio
-sdk_version: 6.13.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Warehouse GridWorld
+emoji: 📦
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Warehouse GridWorld
+A small Gradio + Gymnasium maze-navigation game. Move the red agent from the
+blue **S** start cell to the green **G** goal cell, avoiding dark **X** obstacles.
+## Controls
+- Arrow keys (or on-screen buttons) move the agent up / right / down / left.
+- Reset randomizes start, goal, and obstacles (~20% density), and guarantees a
+  solvable maze via BFS.
+- Grid size slider rebuilds the environment at sizes 3–25.
+## Reward shaping
+| Event | Reward |
+|---|---|
+| Move into wall / obstacle / out-of-bounds | −5.0 |
+| Step closer to goal (Manhattan) | +1.0 |
+| Step farther from goal | −0.5 |
+| Same Manhattan distance | −0.1 |
+| First time visiting a cell | +0.3 |
+| Reach the goal | +50.0 |
+| Hit step limit (100 steps) | −10.0 |
+## Gymnasium env
+- `observation_space`: `Box([0,0,0,0], [1,1,1,1])` — `[agent_x, agent_y, goal_x, goal_y]` normalized.
+- `action_space`: `Discrete(4)` — `0=UP, 1=RIGHT, 2=DOWN, 3=LEFT`.
+## Local run
+```bash
+pip install -r requirements.txt
+python app.py
+```

__pycache__/app.cpython-314.pyc ADDED Viewed

Binary file (20.3 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,383 @@

+"""Warehouse GridWorld - Gradio + Gymnasium navigation game.
+Run:
+    pip install -r requirements.txt
+    python app.py
+"""
+from __future__ import annotations
+from collections import deque
+import gradio as gr
+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+# ---------- Constants ----------
+DEFAULT_GRID_SIZE = 9
+MAX_STEPS = 100
+OBSTACLE_DENSITY = 0.20
+UP, RIGHT, DOWN, LEFT = 0, 1, 2, 3
+ACTION_NAMES = {0: "UP", 1: "RIGHT", 2: "DOWN", 3: "LEFT"}
+ACTION_DELTAS = {
+    UP: (-1, 0),
+    RIGHT: (0, 1),
+    DOWN: (1, 0),
+    LEFT: (0, -1),
+}
+# ---------- Environment ----------
+class WarehouseEnv(gym.Env):
+    """Gymnasium env for a randomized warehouse grid.
+    Observation: [agent_x_norm, agent_y_norm, goal_x_norm, goal_y_norm]
+    Action: 0=UP, 1=RIGHT, 2=DOWN, 3=LEFT
+    """
+    metadata = {"render_modes": ["html"]}
+    def __init__(self, grid_size: int = DEFAULT_GRID_SIZE, max_steps: int = MAX_STEPS):
+        super().__init__()
+        self.grid_size = int(grid_size)
+        self.max_steps = int(max_steps)
+        self.action_space = spaces.Discrete(4)
+        self.observation_space = spaces.Box(
+            low=0.0, high=1.0, shape=(4,), dtype=np.float32
+        )
+        self.grid: np.ndarray | None = None
+        self.agent_pos: tuple[int, int] = (0, 0)
+        self.start_pos: tuple[int, int] = (0, 0)
+        self.goal_pos: tuple[int, int] = (0, 0)
+        self.steps = 0
+        self.total_score = 0.0
+        self.last_reward = 0.0
+        self.last_action: int | None = None
+        self.last_rule = "New episode started. Agent begins on S."
+        self.visited: set[tuple[int, int]] = set()
+        self.terminated = False
+        self.truncated = False
+    # --- generation ---
+    def _is_solvable(self, grid: np.ndarray, start: tuple[int, int], goal: tuple[int, int]) -> bool:
+        n = self.grid_size
+        if grid[start] == 1 or grid[goal] == 1:
+            return False
+        seen = {start}
+        q = deque([start])
+        while q:
+            r, c = q.popleft()
+            if (r, c) == goal:
+                return True
+            for dr, dc in ((-1, 0), (1, 0), (0, -1), (0, 1)):
+                nr, nc = r + dr, c + dc
+                if 0 <= nr < n and 0 <= nc < n and grid[nr, nc] == 0 and (nr, nc) not in seen:
+                    seen.add((nr, nc))
+                    q.append((nr, nc))
+        return False
+    def _generate_grid(self):
+        n = self.grid_size
+        rng = self.np_random
+        for _ in range(300):
+            start = (int(rng.integers(0, n)), int(rng.integers(0, n)))
+            goal = (int(rng.integers(0, n)), int(rng.integers(0, n)))
+            if start == goal:
+                continue
+            grid = (rng.random((n, n)) < OBSTACLE_DENSITY).astype(np.int8)
+            grid[start] = 0
+            grid[goal] = 0
+            if self._is_solvable(grid, start, goal):
+                return grid, start, goal
+        # Safe fallback: empty grid corner-to-corner
+        return (
+            np.zeros((n, n), dtype=np.int8),
+            (0, 0),
+            (n - 1, n - 1),
+        )
+    # --- helpers ---
+    def _get_obs(self) -> np.ndarray:
+        denom = max(self.grid_size - 1, 1)
+        ax, ay = self.agent_pos
+        gx, gy = self.goal_pos
+        return np.array(
+            [ax / denom, ay / denom, gx / denom, gy / denom], dtype=np.float32
+        )
+    @staticmethod
+    def _manhattan(a: tuple[int, int], b: tuple[int, int]) -> int:
+        return abs(a[0] - b[0]) + abs(a[1] - b[1])
+    # --- gym API ---
+    def reset(self, seed: int | None = None, options: dict | None = None):
+        super().reset(seed=seed)
+        self.grid, self.start_pos, self.goal_pos = self._generate_grid()
+        self.agent_pos = self.start_pos
+        self.steps = 0
+        self.total_score = 0.0
+        self.last_reward = 0.0
+        self.last_action = None
+        self.last_rule = "New episode started. Agent begins on S."
+        self.visited = {self.start_pos}
+        self.terminated = False
+        self.truncated = False
+        return self._get_obs(), {}
+    def step(self, action: int):
+        if self.terminated or self.truncated:
+            return self._get_obs(), 0.0, self.terminated, self.truncated, {}
+        action = int(action)
+        self.steps += 1
+        self.last_action = action
+        dr, dc = ACTION_DELTAS[action]
+        nr, nc = self.agent_pos[0] + dr, self.agent_pos[1] + dc
+        n = self.grid_size
+        old_dist = self._manhattan(self.agent_pos, self.goal_pos)
+        reward = 0.0
+        rule_parts: list[str] = []
+        out_of_bounds = not (0 <= nr < n and 0 <= nc < n)
+        is_obstacle = (not out_of_bounds) and self.grid[nr, nc] == 1
+        if out_of_bounds or is_obstacle:
+            reward += -5.0
+            rule_parts.append(
+                "Invalid move: " + ("out of bounds" if out_of_bounds else "obstacle")
+                + " (-5.0)"
+            )
+        else:
+            self.agent_pos = (nr, nc)
+            new_dist = self._manhattan(self.agent_pos, self.goal_pos)
+            if new_dist < old_dist:
+                reward += 1.0
+                rule_parts.append("Closer to goal (+1.0)")
+            elif new_dist > old_dist:
+                reward += -0.5
+                rule_parts.append("Farther from goal (-0.5)")
+            else:
+                reward += -0.1
+                rule_parts.append("Same Manhattan distance (-0.1)")
+            if self.agent_pos not in self.visited:
+                reward += 0.3
+                rule_parts.append("New cell (+0.3)")
+                self.visited.add(self.agent_pos)
+            if self.agent_pos == self.goal_pos:
+                reward += 50.0
+                rule_parts.append("GOAL reached (+50.0)")
+                self.terminated = True
+        if not self.terminated and self.steps >= self.max_steps:
+            reward += -10.0
+            rule_parts.append("Step limit timeout (-10.0)")
+            self.truncated = True
+        self.last_reward = reward
+        self.total_score += reward
+        self.last_rule = "; ".join(rule_parts) + "."
+        return self._get_obs(), reward, self.terminated, self.truncated, {}
+# ---------- Rendering ----------
+def render_grid_html(env: WarehouseEnv) -> str:
+    n = env.grid_size
+    cell_size = max(26, min(56, 520 // n))
+    dot = int(cell_size * 0.6)
+    css = f"""
+    <style>
+      .wh-wrap {{ display: inline-block; }}
+      .wh-grid {{
+        display: grid;
+        grid-template-columns: repeat({n}, {cell_size}px);
+        grid-template-rows: repeat({n}, {cell_size}px);
+        gap: 1px;
+        background: #333;
+        padding: 1px;
+        border: 2px solid #222;
+        width: fit-content;
+      }}
+      .wh-cell {{
+        width: {cell_size}px;
+        height: {cell_size}px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
+        font-weight: 700;
+        font-size: {int(cell_size * 0.42)}px;
+      }}
+      .wh-empty    {{ background: #f3f3f3; color: #cfcfcf; }}
+      .wh-obstacle {{ background: #2b3a55; color: #2b3a55; }}
+      .wh-start    {{ background: #79b6ff; color: #003f8a; }}
+      .wh-goal     {{ background: #6ee08a; color: #0a5022; }}
+      .wh-dot {{
+        width: {dot}px;
+        height: {dot}px;
+        border-radius: 50%;
+        background: #e63946;
+        border: 2px solid #7a1018;
+        box-shadow: 0 0 4px rgba(0,0,0,0.35);
+      }}
+    </style>
+    """
+    cells: list[str] = []
+    for r in range(n):
+        for c in range(n):
+            pos = (r, c)
+            if env.grid[r, c] == 1:
+                cls, label = "wh-obstacle", "X"
+            elif pos == env.start_pos:
+                cls, label = "wh-start", "S"
+            elif pos == env.goal_pos:
+                cls, label = "wh-goal", "G"
+            else:
+                cls, label = "wh-empty", "."
+            inner = '<div class="wh-dot"></div>' if pos == env.agent_pos else label
+            cells.append(f'<div class="wh-cell {cls}">{inner}</div>')
+    return css + f'<div class="wh-wrap"><div class="wh-grid">{"".join(cells)}</div></div>'
+def render_scoreboard_md(env: WarehouseEnv) -> str:
+    if env.terminated:
+        status = "🏁 Goal reached!"
+    elif env.truncated:
+        status = "⏱️ Timed out"
+    else:
+        status = "🎮 Playing"
+    last_action = (
+        ACTION_NAMES[env.last_action] if env.last_action is not None else "None"
+    )
+    dist = WarehouseEnv._manhattan(env.agent_pos, env.goal_pos)
+    return f"""### Score Board
+| Field | Value |
+|---|---|
+| **Total Score** | `{env.total_score:+.2f}` |
+| **Last Reward** | `{env.last_reward:+.2f}` |
+| **Steps** | `{env.steps} / {env.max_steps}` |
+| **Agent Position** | `({env.agent_pos[0]}, {env.agent_pos[1]})` |
+| **Goal Position** | `({env.goal_pos[0]}, {env.goal_pos[1]})` |
+| **Manhattan Distance** | `{dist}` |
+| **Status** | {status} |
+| **Last Action** | `{last_action}` |
+| **Rule Fired** | {env.last_rule} |
+"""
+# ---------- Gradio app ----------
+KEYBOARD_JS = """
+() => {
+  if (window.__wh_kb_bound) return;
+  window.__wh_kb_bound = true;
+  document.addEventListener('keydown', (e) => {
+    const tag = (e.target && e.target.tagName) || '';
+    if (tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT') return;
+    const map = {
+      'ArrowUp':    'wh-btn-up',
+      'ArrowRight': 'wh-btn-right',
+      'ArrowDown':  'wh-btn-down',
+      'ArrowLeft':  'wh-btn-left',
+    };
+    const id = map[e.key];
+    if (!id) return;
+    e.preventDefault();
+    const wrapper = document.getElementById(id);
+    if (!wrapper) return;
+    const btn = wrapper.querySelector('button') || wrapper;
+    btn.click();
+  });
+}
+"""
+def build_app() -> gr.Blocks:
+    initial_env = WarehouseEnv()
+    initial_env.reset(seed=42)
+    with gr.Blocks(title="Warehouse GridWorld") as demo:
+        gr.Markdown(
+            "# 📦 Warehouse GridWorld Game\n"
+            "Use the **arrow keys** (or buttons) to move the red agent from **S** to **G**. "
+            f"Obstacles re-randomize at **{int(OBSTACLE_DENSITY * 100)}%** density on every reset."
+        )
+        env_state = gr.State(initial_env)
+        with gr.Row():
+            with gr.Column(scale=3):
+                grid_html = gr.HTML(render_grid_html(initial_env))
+                with gr.Row():
+                    up_btn = gr.Button("↑ Up", elem_id="wh-btn-up")
+                with gr.Row():
+                    left_btn = gr.Button("← Left", elem_id="wh-btn-left")
+                    down_btn = gr.Button("↓ Down", elem_id="wh-btn-down")
+                    right_btn = gr.Button("→ Right", elem_id="wh-btn-right")
+            with gr.Column(scale=2):
+                grid_size_slider = gr.Slider(
+                    minimum=3,
+                    maximum=25,
+                    value=DEFAULT_GRID_SIZE,
+                    step=1,
+                    label="Grid Size (resets on change)",
+                )
+                steps_progress = gr.Slider(
+                    minimum=0,
+                    maximum=MAX_STEPS,
+                    value=0,
+                    step=1,
+                    label=f"Steps (0 / {MAX_STEPS})",
+                    interactive=False,
+                )
+                reset_btn = gr.Button(
+                    "🔁 Reset / Randomize Grid", variant="primary"
+                )
+                scoreboard = gr.Markdown(render_scoreboard_md(initial_env))
+        outputs = [env_state, grid_html, scoreboard, steps_progress]
+        def do_step(state: WarehouseEnv, action: int):
+            state.step(action)
+            return state, render_grid_html(state), render_scoreboard_md(state), state.steps
+        def do_reset(state: WarehouseEnv, new_size: float):
+            new_size = int(new_size)
+            if state is None or new_size != state.grid_size:
+                state = WarehouseEnv(grid_size=new_size)
+            state.reset()
+            return state, render_grid_html(state), render_scoreboard_md(state), state.steps
+        up_btn.click(lambda s: do_step(s, UP), inputs=env_state, outputs=outputs)
+        right_btn.click(lambda s: do_step(s, RIGHT), inputs=env_state, outputs=outputs)
+        down_btn.click(lambda s: do_step(s, DOWN), inputs=env_state, outputs=outputs)
+        left_btn.click(lambda s: do_step(s, LEFT), inputs=env_state, outputs=outputs)
+        reset_btn.click(do_reset, inputs=[env_state, grid_size_slider], outputs=outputs)
+        grid_size_slider.release(
+            do_reset, inputs=[env_state, grid_size_slider], outputs=outputs
+        )
+        demo.load(fn=None, inputs=None, outputs=None, js=KEYBOARD_JS)
+    return demo
+if __name__ == "__main__":
+    app = build_app()
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio>=4.44.0
+gymnasium>=0.29.1
+numpy>=1.26.0