Spaces:

eaglelandsonce
/

test_Maze

Sleeping

App Files Files Community

test_Maze / app.py

eaglelandsonce

Create app.py

43fe5f9 verified about 1 month ago

raw

history blame contribute delete

19.4 kB

	import html
	from collections import deque

	import gradio as gr
	import gymnasium as gym
	import numpy as np
	from gymnasium import spaces


	# ============================================================
	# Gymnasium Environment: Warehouse GridWorld
	# ============================================================

	class WarehouseGridWorldEnv(gym.Env):
	"""
	Warehouse GridWorld navigation environment.

	Grid symbols:
	S = Start
	G = Goal
	X = Obstacle
	. = Empty cell
	A = Agent shown visually in the UI

	Observation:
	[agent_x_normalized, agent_y_normalized, goal_x_normalized, goal_y_normalized]

	Actions:
	0 = UP
	1 = RIGHT
	2 = DOWN
	3 = LEFT
	"""

	metadata = {"render_modes": ["human"]}

	def __init__(self, grid_size=9, obstacle_density=0.20, max_steps=100, seed=None):
	super().__init__()

	self.grid_size = int(grid_size)
	self.obstacle_density = float(obstacle_density)
	self.max_steps = int(max_steps)

	self.action_space = spaces.Discrete(4)

	self.observation_space = spaces.Box(
	low=0.0,
	high=1.0,
	shape=(4,),
	dtype=np.float32
	)

	self.action_map = {
	0: (-1, 0), # UP
	1: (0, 1), # RIGHT
	2: (1, 0), # DOWN
	3: (0, -1), # LEFT
	}

	self.action_names = {
	0: "UP",
	1: "RIGHT",
	2: "DOWN",
	3: "LEFT",
	}

	self.reset(seed=seed)

	def reset(self, seed=None, options=None):
	super().reset(seed=seed)

	self.grid, self.start, self.goal = self._generate_solvable_grid()

	self.agent_pos = self.start
	self.steps = 0
	self.total_score = 0.0
	self.last_reward = 0.0
	self.done = False
	self.status = "Playing"
	self.last_action = "None"
	self.rule_fired = "New episode started. Agent begins on S."

	self.visited = {self.start}

	return self._get_obs(), self._get_info()

	def step(self, action):
	if self.done:
	self.last_reward = 0.0
	self.rule_fired = "Episode already finished. Press reset to play again."
	return self._get_obs(), 0.0, True, False, self._get_info()

	action = int(action)
	self.steps += 1
	self.last_action = self.action_names[action]

	old_pos = self.agent_pos
	old_distance = self._manhattan_distance(old_pos, self.goal)

	dr, dc = self.action_map[action]
	new_pos = (old_pos[0] + dr, old_pos[1] + dc)

	reward = 0.0
	terminated = False
	truncated = False

	if not self._is_valid_position(new_pos):
	reward = -5.0
	self.rule_fired = "-5.0 wall/obstacle penalty. Agent stays in place."
	new_pos = old_pos

	else:
	self.agent_pos = new_pos
	new_distance = self._manhattan_distance(new_pos, self.goal)

	if new_distance < old_distance:
	reward += 1.0
	self.rule_fired = "+1.0 moved closer to goal."
	elif new_distance > old_distance:
	reward -= 0.5
	self.rule_fired = "-0.5 moved farther from goal."
	else:
	reward -= 0.1
	self.rule_fired = "-0.1 same Manhattan distance."

	if new_pos not in self.visited:
	reward += 0.3
	self.rule_fired += " +0.3 new-cell bonus."

	self.visited.add(new_pos)

	if new_pos == self.goal:
	reward += 50.0
	terminated = True
	self.done = True
	self.status = "Goal Reached"
	self.rule_fired += " +50.0 goal reached!"

	if not terminated and self.steps >= self.max_steps:
	reward -= 10.0
	truncated = True
	self.done = True
	self.status = "Timeout"
	self.rule_fired += " -10.0 timeout penalty."

	self.last_reward = reward
	self.total_score += reward

	return self._get_obs(), reward, terminated, truncated, self._get_info()

	def _get_obs(self):
	denominator = max(1, self.grid_size - 1)

	agent_row, agent_col = self.agent_pos
	goal_row, goal_col = self.goal

	return np.array(
	[
	agent_row / denominator,
	agent_col / denominator,
	goal_row / denominator,
	goal_col / denominator,
	],
	dtype=np.float32,
	)

	def _get_info(self):
	return {
	"total_score": self.total_score,
	"last_reward": self.last_reward,
	"steps": self.steps,
	"agent_position": self.agent_pos,
	"goal_position": self.goal,
	"manhattan_distance": self._manhattan_distance(self.agent_pos, self.goal),
	"status": self.status,
	"last_action": self.last_action,
	"rule_fired": self.rule_fired,
	"goal_reached": self.agent_pos == self.goal,
	}

	def _generate_solvable_grid(self):
	"""
	Randomizes start, goal, and obstacles at approximately 20% density.
	Keeps trying until there is a valid path from S to G.
	"""

	for _ in range(500):
	grid = np.full((self.grid_size, self.grid_size), ".", dtype="<U1")

	start = self._random_cell()
	goal = self._random_cell()

	while goal == start or self._manhattan_distance(start, goal) < self.grid_size // 2:
	goal = self._random_cell()

	available_cells = [
	(r, c)
	for r in range(self.grid_size)
	for c in range(self.grid_size)
	if (r, c) not in {start, goal}
	]

	obstacle_count = int(len(available_cells) * self.obstacle_density)

	obstacle_indices = self.np_random.choice(
	len(available_cells),
	size=obstacle_count,
	replace=False,
	)

	obstacles = [available_cells[i] for i in obstacle_indices]

	for r, c in obstacles:
	grid[r, c] = "X"

	grid[start] = "S"
	grid[goal] = "G"

	if self._path_exists(grid, start, goal):
	return grid, start, goal

	return self._fallback_grid()

	def _fallback_grid(self):
	"""
	Safety fallback in case random generation fails.
	Creates a guaranteed solvable grid with a carved path.
	"""

	grid = np.full((self.grid_size, self.grid_size), ".", dtype="<U1")

	start = (0, 0)
	goal = (self.grid_size - 1, self.grid_size - 1)

	safe_path = set()

	for r in range(self.grid_size):
	safe_path.add((r, 0))

	for c in range(self.grid_size):
	safe_path.add((self.grid_size - 1, c))

	available_cells = [
	(r, c)
	for r in range(self.grid_size)
	for c in range(self.grid_size)
	if (r, c) not in safe_path and (r, c) not in {start, goal}
	]

	obstacle_count = int((self.grid_size * self.grid_size - 2) * self.obstacle_density)
	obstacle_count = min(obstacle_count, len(available_cells))

	obstacle_indices = self.np_random.choice(
	len(available_cells),
	size=obstacle_count,
	replace=False,
	)

	for index in obstacle_indices:
	r, c = available_cells[index]
	grid[r, c] = "X"

	grid[start] = "S"
	grid[goal] = "G"

	return grid, start, goal

	def _random_cell(self):
	row = int(self.np_random.integers(0, self.grid_size))
	col = int(self.np_random.integers(0, self.grid_size))
	return row, col

	def _path_exists(self, grid, start, goal):
	queue = deque([start])
	seen = {start}

	while queue:
	current = queue.popleft()

	if current == goal:
	return True

	for dr, dc in self.action_map.values():
	nr = current[0] + dr
	nc = current[1] + dc
	next_pos = (nr, nc)

	if (
	0 <= nr < self.grid_size
	and 0 <= nc < self.grid_size
	and next_pos not in seen
	and grid[nr, nc] != "X"
	):
	seen.add(next_pos)
	queue.append(next_pos)

	return False

	def _is_valid_position(self, pos):
	row, col = pos

	if row < 0 or row >= self.grid_size:
	return False

	if col < 0 or col >= self.grid_size:
	return False

	if self.grid[row, col] == "X":
	return False

	return True

	def _manhattan_distance(self, pos_a, pos_b):
	return abs(pos_a[0] - pos_b[0]) + abs(pos_a[1] - pos_b[1])


	# ============================================================
	# HTML Rendering
	# ============================================================

	def render_grid(env):
	rows = []

	for r in range(env.grid_size):
	cells = []

	for c in range(env.grid_size):
	pos = (r, c)
	value = env.grid[r, c]

	if value == "S":
	cell_class = "cell-start"
	label = "S"
	elif value == "G":
	cell_class = "cell-goal"
	label = "G"
	elif value == "X":
	cell_class = "cell-obstacle"
	label = "X"
	else:
	cell_class = "cell-empty"
	label = ""

	if pos == env.agent_pos:
	label = "<div class='agent-circle'>A</div>"

	cells.append(
	f"<td class='warehouse-cell {cell_class}'>{label}</td>"
	)

	rows.append("<tr>" + "".join(cells) + "</tr>")

	table = "<table class='warehouse-grid'>" + "".join(rows) + "</table>"

	return f"""
	<div class="grid-panel">
	{table}
	</div>
	"""


	def render_scoreboard(env):
	info = env._get_info()

	agent_row, agent_col = info["agent_position"]
	goal_row, goal_col = info["goal_position"]

	goal_text = "Yes" if info["goal_reached"] else "No"

	return f"""
	<div class="score-card">
	<h3>Score Board</h3>

	<div class="metric-grid">
	<div class="metric-box">
	<div class="metric-label">Total Score</div>
	<div class="metric-value">{info["total_score"]:.1f}</div>
	</div>

	<div class="metric-box">
	<div class="metric-label">Last Reward</div>
	<div class="metric-value">{info["last_reward"]:+.1f}</div>
	</div>

	<div class="metric-box">
	<div class="metric-label">Steps</div>
	<div class="metric-value">{info["steps"]} / {env.max_steps}</div>
	</div>

	<div class="metric-box">
	<div class="metric-label">Manhattan Distance</div>
	<div class="metric-value">{info["manhattan_distance"]}</div>
	</div>
	</div>

	<div class="detail-line"><b>Agent Position:</b> ({agent_row}, {agent_col})</div>
	<div class="detail-line"><b>Goal Position:</b> ({goal_row}, {goal_col})</div>
	<div class="detail-line"><b>Goal Reached:</b> {goal_text}</div>
	<div class="detail-line"><b>Status:</b> {html.escape(info["status"])}</div>
	<div class="detail-line"><b>Last Action:</b> {html.escape(info["last_action"])}</div>

	<div class="rule-box">
	<b>Rule Fired:</b><br>
	{html.escape(info["rule_fired"])}
	</div>

	<hr>

	<h4>Reward Rules</h4>
	<ul class="reward-list">
	<li><code>-5.0</code> wall/obstacle</li>
	<li><code>+1.0</code> closer to goal</li>
	<li><code>-0.5</code> farther from goal</li>
	<li><code>-0.1</code> same distance</li>
	<li><code>+0.3</code> new-cell bonus</li>
	<li><code>+50.0</code> goal reached</li>
	<li><code>-10.0</code> timeout</li>
	</ul>
	</div>
	"""


	# ============================================================
	# Gradio Event Functions
	# ============================================================

	def new_game(grid_size):
	env = WarehouseGridWorldEnv(
	grid_size=int(grid_size),
	obstacle_density=0.20,
	max_steps=100,
	)

	return env, render_grid(env), render_scoreboard(env)


	def move_agent(env, action):
	if env is None:
	env = WarehouseGridWorldEnv(grid_size=9, obstacle_density=0.20, max_steps=100)

	env.step(action)

	return env, render_grid(env), render_scoreboard(env)


	def move_up(env):
	return move_agent(env, 0)


	def move_right(env):
	return move_agent(env, 1)


	def move_down(env):
	return move_agent(env, 2)


	def move_left(env):
	return move_agent(env, 3)


	# ============================================================
	# Styling and Keyboard Script
	# ============================================================

	APP_CSS = """
	body {
	background: #f7f9fc;
	}

	.main-title {
	text-align: center;
	margin-bottom: 0px;
	}

	.subtitle {
	text-align: center;
	color: #555;
	margin-top: 0px;
	}

	.grid-panel {
	display: flex;
	justify-content: center;
	align-items: center;
	padding: 12px;
	}

	.warehouse-grid {
	border-collapse: collapse;
	border: 3px solid #2f4858;
	background: white;
	}

	.warehouse-cell {
	width: 42px;
	height: 42px;
	border: 2px solid #607d8b;
	text-align: center;
	vertical-align: middle;
	font-weight: 800;
	font-size: 16px;
	font-family: Arial, sans-serif;
	}

	.cell-start {
	background: #b9d7ff;
	color: #0b3d91;
	}

	.cell-goal {
	background: #2ecc71;
	color: #063b1d;
	}

	.cell-obstacle {
	background: #2f3e46;
	color: #dce3e8;
	}

	.cell-empty {
	background: #edf6fb;
	color: #607d8b;
	}

	.agent-circle {
	width: 30px;
	height: 30px;
	background: #e53935;
	color: white;
	border-radius: 999px;
	display: flex;
	align-items: center;
	justify-content: center;
	margin: auto;
	font-size: 15px;
	font-weight: 900;
	box-shadow: 0 2px 5px rgba(0,0,0,0.35);
	}

	.score-card {
	background: white;
	border: 1px solid #d9e2ec;
	border-radius: 14px;
	padding: 16px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.06);
	}

	.score-card h3 {
	margin-top: 0px;
	}

	.metric-grid {
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 10px;
	}

	.metric-box {
	background: #f1f5f9;
	border-radius: 10px;
	padding: 10px;
	}

	.metric-label {
	color: #64748b;
	font-size: 12px;
	}

	.metric-value {
	color: #111827;
	font-size: 20px;
	font-weight: 800;
	}

	.detail-line {
	margin-top: 8px;
	font-size: 14px;
	}

	.rule-box {
	margin-top: 12px;
	padding: 10px;
	background: #fff7ed;
	border-left: 5px solid #fb923c;
	border-radius: 8px;
	font-size: 14px;
	}

	.reward-list {
	margin-top: 6px;
	}

	.reward-list li {
	margin-bottom: 5px;
	}

	.reward-list code {
	background: #e5e7eb;
	padding: 3px 6px;
	border-radius: 6px;
	font-weight: 800;
	}

	.control-note {
	text-align: center;
	color: #555;
	font-size: 14px;
	}

	button {
	font-weight: 700 !important;
	}
	"""

	KEYBOARD_SCRIPT = """
	<script>
	(function () {
	function clickButton(id) {
	const container = document.getElementById(id);
	if (!container) return;

	const button = container.querySelector("button") \|\| container;
	if (button) button.click();
	}

	function bindKeys() {
	if (window.__warehouse_gridworld_keys_bound) return;
	window.__warehouse_gridworld_keys_bound = true;

	document.addEventListener("keydown", function (event) {
	const tag = event.target.tagName;

	if (tag === "INPUT" \|\| tag === "TEXTAREA" \|\| tag === "SELECT") {
	return;
	}

	if (event.key === "ArrowUp") {
	event.preventDefault();
	clickButton("up_btn");
	}

	if (event.key === "ArrowRight") {
	event.preventDefault();
	clickButton("right_btn");
	}

	if (event.key === "ArrowDown") {
	event.preventDefault();
	clickButton("down_btn");
	}

	if (event.key === "ArrowLeft") {
	event.preventDefault();
	clickButton("left_btn");
	}
	});
	}

	if (document.readyState === "loading") {
	document.addEventListener("DOMContentLoaded", bindKeys);
	} else {
	bindKeys();
	}

	setTimeout(bindKeys, 1000);
	})();
	</script>
	"""


	# ============================================================
	# Gradio App
	# ============================================================

	with gr.Blocks(
	title="Warehouse GridWorld Game",
	css=APP_CSS,
	head=KEYBOARD_SCRIPT,
	) as demo:

	env_state = gr.State()

	gr.Markdown(
	"""
	# 🏗️ Warehouse GridWorld Game

	<p class="subtitle">
	Use the keyboard arrow keys or the on-screen buttons to move the red agent from S to G.
	Obstacles are randomized at approximately 20% density on every reset.
	</p>
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	grid_output = gr.HTML()

	gr.Markdown(
	"""
	<p class="control-note">
	Controls: ↑ ↓ ← → arrow keys, or use the buttons below.
	</p>
	"""
	)

	with gr.Row():
	up_btn = gr.Button("↑ Up", elem_id="up_btn")

	with gr.Row():
	left_btn = gr.Button("← Left", elem_id="left_btn")
	down_btn = gr.Button("↓ Down", elem_id="down_btn")
	right_btn = gr.Button("→ Right", elem_id="right_btn")

	with gr.Column(scale=1):
	grid_size = gr.Slider(
	minimum=5,
	maximum=15,
	value=9,
	step=1,
	label="Grid Size",
	)

	reset_btn = gr.Button("🔄 Reset / Randomize Grid", variant="primary")

	scoreboard_output = gr.HTML()

	reset_btn.click(
	fn=new_game,
	inputs=[grid_size],
	outputs=[env_state, grid_output, scoreboard_output],
	)

	up_btn.click(
	fn=move_up,
	inputs=[env_state],
	outputs=[env_state, grid_output, scoreboard_output],
	)

	right_btn.click(
	fn=move_right,
	inputs=[env_state],
	outputs=[env_state, grid_output, scoreboard_output],
	)

	down_btn.click(
	fn=move_down,
	inputs=[env_state],
	outputs=[env_state, grid_output, scoreboard_output],
	)

	left_btn.click(
	fn=move_left,
	inputs=[env_state],
	outputs=[env_state, grid_output, scoreboard_output],
	)

	demo.load(
	fn=new_game,
	inputs=[grid_size],
	outputs=[env_state, grid_output, scoreboard_output],
	)


	if __name__ == "__main__":
	demo.launch()