kai2ser commited on
Commit
c91185e
ยท
verified ยท
1 Parent(s): 4ef69fc

Initial upload: Warehouse GridWorld Gradio app

Browse files
Files changed (4) hide show
  1. README.md +37 -6
  2. __pycache__/app.cpython-314.pyc +0 -0
  3. app.py +383 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,43 @@
1
  ---
2
- title: Warehouse Gridworld
3
- emoji: ๐Ÿข
4
- colorFrom: purple
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.13.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Warehouse GridWorld
3
+ emoji: ๐Ÿ“ฆ
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # Warehouse GridWorld
14
+
15
+ A small Gradio + Gymnasium maze-navigation game. Move the red agent from the
16
+ blue **S** start cell to the green **G** goal cell, avoiding dark **X** obstacles.
17
+
18
+ ## Controls
19
+ - Arrow keys (or on-screen buttons) move the agent up / right / down / left.
20
+ - Reset randomizes start, goal, and obstacles (~20% density), and guarantees a
21
+ solvable maze via BFS.
22
+ - Grid size slider rebuilds the environment at sizes 3โ€“25.
23
+
24
+ ## Reward shaping
25
+ | Event | Reward |
26
+ |---|---|
27
+ | Move into wall / obstacle / out-of-bounds | โˆ’5.0 |
28
+ | Step closer to goal (Manhattan) | +1.0 |
29
+ | Step farther from goal | โˆ’0.5 |
30
+ | Same Manhattan distance | โˆ’0.1 |
31
+ | First time visiting a cell | +0.3 |
32
+ | Reach the goal | +50.0 |
33
+ | Hit step limit (100 steps) | โˆ’10.0 |
34
+
35
+ ## Gymnasium env
36
+ - `observation_space`: `Box([0,0,0,0], [1,1,1,1])` โ€” `[agent_x, agent_y, goal_x, goal_y]` normalized.
37
+ - `action_space`: `Discrete(4)` โ€” `0=UP, 1=RIGHT, 2=DOWN, 3=LEFT`.
38
+
39
+ ## Local run
40
+ ```bash
41
+ pip install -r requirements.txt
42
+ python app.py
43
+ ```
__pycache__/app.cpython-314.pyc ADDED
Binary file (20.3 kB). View file
 
app.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Warehouse GridWorld - Gradio + Gymnasium navigation game.
2
+
3
+ Run:
4
+ pip install -r requirements.txt
5
+ python app.py
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import deque
11
+
12
+ import gradio as gr
13
+ import gymnasium as gym
14
+ import numpy as np
15
+ from gymnasium import spaces
16
+
17
+ # ---------- Constants ----------
18
+
19
+ DEFAULT_GRID_SIZE = 9
20
+ MAX_STEPS = 100
21
+ OBSTACLE_DENSITY = 0.20
22
+
23
+ UP, RIGHT, DOWN, LEFT = 0, 1, 2, 3
24
+ ACTION_NAMES = {0: "UP", 1: "RIGHT", 2: "DOWN", 3: "LEFT"}
25
+ ACTION_DELTAS = {
26
+ UP: (-1, 0),
27
+ RIGHT: (0, 1),
28
+ DOWN: (1, 0),
29
+ LEFT: (0, -1),
30
+ }
31
+
32
+
33
+ # ---------- Environment ----------
34
+
35
+
36
+ class WarehouseEnv(gym.Env):
37
+ """Gymnasium env for a randomized warehouse grid.
38
+
39
+ Observation: [agent_x_norm, agent_y_norm, goal_x_norm, goal_y_norm]
40
+ Action: 0=UP, 1=RIGHT, 2=DOWN, 3=LEFT
41
+ """
42
+
43
+ metadata = {"render_modes": ["html"]}
44
+
45
+ def __init__(self, grid_size: int = DEFAULT_GRID_SIZE, max_steps: int = MAX_STEPS):
46
+ super().__init__()
47
+ self.grid_size = int(grid_size)
48
+ self.max_steps = int(max_steps)
49
+ self.action_space = spaces.Discrete(4)
50
+ self.observation_space = spaces.Box(
51
+ low=0.0, high=1.0, shape=(4,), dtype=np.float32
52
+ )
53
+
54
+ self.grid: np.ndarray | None = None
55
+ self.agent_pos: tuple[int, int] = (0, 0)
56
+ self.start_pos: tuple[int, int] = (0, 0)
57
+ self.goal_pos: tuple[int, int] = (0, 0)
58
+ self.steps = 0
59
+ self.total_score = 0.0
60
+ self.last_reward = 0.0
61
+ self.last_action: int | None = None
62
+ self.last_rule = "New episode started. Agent begins on S."
63
+ self.visited: set[tuple[int, int]] = set()
64
+ self.terminated = False
65
+ self.truncated = False
66
+
67
+ # --- generation ---
68
+
69
+ def _is_solvable(self, grid: np.ndarray, start: tuple[int, int], goal: tuple[int, int]) -> bool:
70
+ n = self.grid_size
71
+ if grid[start] == 1 or grid[goal] == 1:
72
+ return False
73
+ seen = {start}
74
+ q = deque([start])
75
+ while q:
76
+ r, c = q.popleft()
77
+ if (r, c) == goal:
78
+ return True
79
+ for dr, dc in ((-1, 0), (1, 0), (0, -1), (0, 1)):
80
+ nr, nc = r + dr, c + dc
81
+ if 0 <= nr < n and 0 <= nc < n and grid[nr, nc] == 0 and (nr, nc) not in seen:
82
+ seen.add((nr, nc))
83
+ q.append((nr, nc))
84
+ return False
85
+
86
+ def _generate_grid(self):
87
+ n = self.grid_size
88
+ rng = self.np_random
89
+ for _ in range(300):
90
+ start = (int(rng.integers(0, n)), int(rng.integers(0, n)))
91
+ goal = (int(rng.integers(0, n)), int(rng.integers(0, n)))
92
+ if start == goal:
93
+ continue
94
+ grid = (rng.random((n, n)) < OBSTACLE_DENSITY).astype(np.int8)
95
+ grid[start] = 0
96
+ grid[goal] = 0
97
+ if self._is_solvable(grid, start, goal):
98
+ return grid, start, goal
99
+ # Safe fallback: empty grid corner-to-corner
100
+ return (
101
+ np.zeros((n, n), dtype=np.int8),
102
+ (0, 0),
103
+ (n - 1, n - 1),
104
+ )
105
+
106
+ # --- helpers ---
107
+
108
+ def _get_obs(self) -> np.ndarray:
109
+ denom = max(self.grid_size - 1, 1)
110
+ ax, ay = self.agent_pos
111
+ gx, gy = self.goal_pos
112
+ return np.array(
113
+ [ax / denom, ay / denom, gx / denom, gy / denom], dtype=np.float32
114
+ )
115
+
116
+ @staticmethod
117
+ def _manhattan(a: tuple[int, int], b: tuple[int, int]) -> int:
118
+ return abs(a[0] - b[0]) + abs(a[1] - b[1])
119
+
120
+ # --- gym API ---
121
+
122
+ def reset(self, seed: int | None = None, options: dict | None = None):
123
+ super().reset(seed=seed)
124
+ self.grid, self.start_pos, self.goal_pos = self._generate_grid()
125
+ self.agent_pos = self.start_pos
126
+ self.steps = 0
127
+ self.total_score = 0.0
128
+ self.last_reward = 0.0
129
+ self.last_action = None
130
+ self.last_rule = "New episode started. Agent begins on S."
131
+ self.visited = {self.start_pos}
132
+ self.terminated = False
133
+ self.truncated = False
134
+ return self._get_obs(), {}
135
+
136
+ def step(self, action: int):
137
+ if self.terminated or self.truncated:
138
+ return self._get_obs(), 0.0, self.terminated, self.truncated, {}
139
+
140
+ action = int(action)
141
+ self.steps += 1
142
+ self.last_action = action
143
+
144
+ dr, dc = ACTION_DELTAS[action]
145
+ nr, nc = self.agent_pos[0] + dr, self.agent_pos[1] + dc
146
+ n = self.grid_size
147
+
148
+ old_dist = self._manhattan(self.agent_pos, self.goal_pos)
149
+ reward = 0.0
150
+ rule_parts: list[str] = []
151
+
152
+ out_of_bounds = not (0 <= nr < n and 0 <= nc < n)
153
+ is_obstacle = (not out_of_bounds) and self.grid[nr, nc] == 1
154
+
155
+ if out_of_bounds or is_obstacle:
156
+ reward += -5.0
157
+ rule_parts.append(
158
+ "Invalid move: " + ("out of bounds" if out_of_bounds else "obstacle")
159
+ + " (-5.0)"
160
+ )
161
+ else:
162
+ self.agent_pos = (nr, nc)
163
+ new_dist = self._manhattan(self.agent_pos, self.goal_pos)
164
+ if new_dist < old_dist:
165
+ reward += 1.0
166
+ rule_parts.append("Closer to goal (+1.0)")
167
+ elif new_dist > old_dist:
168
+ reward += -0.5
169
+ rule_parts.append("Farther from goal (-0.5)")
170
+ else:
171
+ reward += -0.1
172
+ rule_parts.append("Same Manhattan distance (-0.1)")
173
+
174
+ if self.agent_pos not in self.visited:
175
+ reward += 0.3
176
+ rule_parts.append("New cell (+0.3)")
177
+ self.visited.add(self.agent_pos)
178
+
179
+ if self.agent_pos == self.goal_pos:
180
+ reward += 50.0
181
+ rule_parts.append("GOAL reached (+50.0)")
182
+ self.terminated = True
183
+
184
+ if not self.terminated and self.steps >= self.max_steps:
185
+ reward += -10.0
186
+ rule_parts.append("Step limit timeout (-10.0)")
187
+ self.truncated = True
188
+
189
+ self.last_reward = reward
190
+ self.total_score += reward
191
+ self.last_rule = "; ".join(rule_parts) + "."
192
+ return self._get_obs(), reward, self.terminated, self.truncated, {}
193
+
194
+
195
+ # ---------- Rendering ----------
196
+
197
+
198
+ def render_grid_html(env: WarehouseEnv) -> str:
199
+ n = env.grid_size
200
+ cell_size = max(26, min(56, 520 // n))
201
+ dot = int(cell_size * 0.6)
202
+ css = f"""
203
+ <style>
204
+ .wh-wrap {{ display: inline-block; }}
205
+ .wh-grid {{
206
+ display: grid;
207
+ grid-template-columns: repeat({n}, {cell_size}px);
208
+ grid-template-rows: repeat({n}, {cell_size}px);
209
+ gap: 1px;
210
+ background: #333;
211
+ padding: 1px;
212
+ border: 2px solid #222;
213
+ width: fit-content;
214
+ }}
215
+ .wh-cell {{
216
+ width: {cell_size}px;
217
+ height: {cell_size}px;
218
+ display: flex;
219
+ align-items: center;
220
+ justify-content: center;
221
+ font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
222
+ font-weight: 700;
223
+ font-size: {int(cell_size * 0.42)}px;
224
+ }}
225
+ .wh-empty {{ background: #f3f3f3; color: #cfcfcf; }}
226
+ .wh-obstacle {{ background: #2b3a55; color: #2b3a55; }}
227
+ .wh-start {{ background: #79b6ff; color: #003f8a; }}
228
+ .wh-goal {{ background: #6ee08a; color: #0a5022; }}
229
+ .wh-dot {{
230
+ width: {dot}px;
231
+ height: {dot}px;
232
+ border-radius: 50%;
233
+ background: #e63946;
234
+ border: 2px solid #7a1018;
235
+ box-shadow: 0 0 4px rgba(0,0,0,0.35);
236
+ }}
237
+ </style>
238
+ """
239
+ cells: list[str] = []
240
+ for r in range(n):
241
+ for c in range(n):
242
+ pos = (r, c)
243
+ if env.grid[r, c] == 1:
244
+ cls, label = "wh-obstacle", "X"
245
+ elif pos == env.start_pos:
246
+ cls, label = "wh-start", "S"
247
+ elif pos == env.goal_pos:
248
+ cls, label = "wh-goal", "G"
249
+ else:
250
+ cls, label = "wh-empty", "."
251
+ inner = '<div class="wh-dot"></div>' if pos == env.agent_pos else label
252
+ cells.append(f'<div class="wh-cell {cls}">{inner}</div>')
253
+ return css + f'<div class="wh-wrap"><div class="wh-grid">{"".join(cells)}</div></div>'
254
+
255
+
256
+ def render_scoreboard_md(env: WarehouseEnv) -> str:
257
+ if env.terminated:
258
+ status = "๐Ÿ Goal reached!"
259
+ elif env.truncated:
260
+ status = "โฑ๏ธ Timed out"
261
+ else:
262
+ status = "๐ŸŽฎ Playing"
263
+ last_action = (
264
+ ACTION_NAMES[env.last_action] if env.last_action is not None else "None"
265
+ )
266
+ dist = WarehouseEnv._manhattan(env.agent_pos, env.goal_pos)
267
+ return f"""### Score Board
268
+
269
+ | Field | Value |
270
+ |---|---|
271
+ | **Total Score** | `{env.total_score:+.2f}` |
272
+ | **Last Reward** | `{env.last_reward:+.2f}` |
273
+ | **Steps** | `{env.steps} / {env.max_steps}` |
274
+ | **Agent Position** | `({env.agent_pos[0]}, {env.agent_pos[1]})` |
275
+ | **Goal Position** | `({env.goal_pos[0]}, {env.goal_pos[1]})` |
276
+ | **Manhattan Distance** | `{dist}` |
277
+ | **Status** | {status} |
278
+ | **Last Action** | `{last_action}` |
279
+ | **Rule Fired** | {env.last_rule} |
280
+ """
281
+
282
+
283
+ # ---------- Gradio app ----------
284
+
285
+
286
+ KEYBOARD_JS = """
287
+ () => {
288
+ if (window.__wh_kb_bound) return;
289
+ window.__wh_kb_bound = true;
290
+ document.addEventListener('keydown', (e) => {
291
+ const tag = (e.target && e.target.tagName) || '';
292
+ if (tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT') return;
293
+ const map = {
294
+ 'ArrowUp': 'wh-btn-up',
295
+ 'ArrowRight': 'wh-btn-right',
296
+ 'ArrowDown': 'wh-btn-down',
297
+ 'ArrowLeft': 'wh-btn-left',
298
+ };
299
+ const id = map[e.key];
300
+ if (!id) return;
301
+ e.preventDefault();
302
+ const wrapper = document.getElementById(id);
303
+ if (!wrapper) return;
304
+ const btn = wrapper.querySelector('button') || wrapper;
305
+ btn.click();
306
+ });
307
+ }
308
+ """
309
+
310
+
311
+ def build_app() -> gr.Blocks:
312
+ initial_env = WarehouseEnv()
313
+ initial_env.reset(seed=42)
314
+
315
+ with gr.Blocks(title="Warehouse GridWorld") as demo:
316
+ gr.Markdown(
317
+ "# ๐Ÿ“ฆ Warehouse GridWorld Game\n"
318
+ "Use the **arrow keys** (or buttons) to move the red agent from **S** to **G**. "
319
+ f"Obstacles re-randomize at **{int(OBSTACLE_DENSITY * 100)}%** density on every reset."
320
+ )
321
+
322
+ env_state = gr.State(initial_env)
323
+
324
+ with gr.Row():
325
+ with gr.Column(scale=3):
326
+ grid_html = gr.HTML(render_grid_html(initial_env))
327
+ with gr.Row():
328
+ up_btn = gr.Button("โ†‘ Up", elem_id="wh-btn-up")
329
+ with gr.Row():
330
+ left_btn = gr.Button("โ† Left", elem_id="wh-btn-left")
331
+ down_btn = gr.Button("โ†“ Down", elem_id="wh-btn-down")
332
+ right_btn = gr.Button("โ†’ Right", elem_id="wh-btn-right")
333
+ with gr.Column(scale=2):
334
+ grid_size_slider = gr.Slider(
335
+ minimum=3,
336
+ maximum=25,
337
+ value=DEFAULT_GRID_SIZE,
338
+ step=1,
339
+ label="Grid Size (resets on change)",
340
+ )
341
+ steps_progress = gr.Slider(
342
+ minimum=0,
343
+ maximum=MAX_STEPS,
344
+ value=0,
345
+ step=1,
346
+ label=f"Steps (0 / {MAX_STEPS})",
347
+ interactive=False,
348
+ )
349
+ reset_btn = gr.Button(
350
+ "๐Ÿ” Reset / Randomize Grid", variant="primary"
351
+ )
352
+ scoreboard = gr.Markdown(render_scoreboard_md(initial_env))
353
+
354
+ outputs = [env_state, grid_html, scoreboard, steps_progress]
355
+
356
+ def do_step(state: WarehouseEnv, action: int):
357
+ state.step(action)
358
+ return state, render_grid_html(state), render_scoreboard_md(state), state.steps
359
+
360
+ def do_reset(state: WarehouseEnv, new_size: float):
361
+ new_size = int(new_size)
362
+ if state is None or new_size != state.grid_size:
363
+ state = WarehouseEnv(grid_size=new_size)
364
+ state.reset()
365
+ return state, render_grid_html(state), render_scoreboard_md(state), state.steps
366
+
367
+ up_btn.click(lambda s: do_step(s, UP), inputs=env_state, outputs=outputs)
368
+ right_btn.click(lambda s: do_step(s, RIGHT), inputs=env_state, outputs=outputs)
369
+ down_btn.click(lambda s: do_step(s, DOWN), inputs=env_state, outputs=outputs)
370
+ left_btn.click(lambda s: do_step(s, LEFT), inputs=env_state, outputs=outputs)
371
+ reset_btn.click(do_reset, inputs=[env_state, grid_size_slider], outputs=outputs)
372
+ grid_size_slider.release(
373
+ do_reset, inputs=[env_state, grid_size_slider], outputs=outputs
374
+ )
375
+
376
+ demo.load(fn=None, inputs=None, outputs=None, js=KEYBOARD_JS)
377
+
378
+ return demo
379
+
380
+
381
+ if __name__ == "__main__":
382
+ app = build_app()
383
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.44.0
2
+ gymnasium>=0.29.1
3
+ numpy>=1.26.0