Spaces:

stevafernandes
/

RL

Sleeping

App Files Files Community

RL / app.py

stevafernandes

Update app.py

79a885a verified 28 days ago

raw

history blame contribute delete

2.2 kB

	import gymnasium as gym
	import numpy as np
	import gradio as gr
	import io
	import contextlib

	def run_demo():
	buf = io.StringIO()
	with contextlib.redirect_stdout(buf):
	env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
	n_states, n_actions = env.observation_space.n, env.action_space.n
	action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
	grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
	tile_emoji = {'S': '🏁', 'F': '🧊', 'H': '🕳️', 'G': '🎯'}

	def render_grid(state=None):
	for r, row in enumerate(grid_map):
	print("".join(" 🤖 " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))

	# STEP 1: The Game
	print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")

	# STEP 2: The Grid
	print("STEP 2: The Grid")
	render_grid()
	print("Legend: 🏁 Start 🧊 Frozen 🕳️ Hole 🎯 Goal 🤖 Agent\n")

	# STEP 3: The Q Table
	Q = np.zeros((n_states, n_actions))
	print("STEP 3: Q Table")
	print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
	for s in range(n_states):
	print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
	print()

	# STEP 4: Random Episode
	print("STEP 4: Random Episode")
	state, _ = env.reset(seed=3)
	render_grid(state)
	print()

	for t in range(15):
	action = env.action_space.sample()
	state, reward, terminated, truncated, _ = env.step(action)
	print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
	render_grid(state)
	if terminated or truncated:
	print("🎉 GOAL!" if reward > 0 else "💀 HOLE!")
	break

	env.close()
	return buf.getvalue()

	demo = gr.Interface(
	fn=run_demo,
	inputs=[],
	outputs=gr.Textbox(label="Output", lines=40),
	title="Q Learning Part 1: FrozenLake Demo",
	description="Click Submit to run one random episode on the FrozenLake grid.",
	)

	if __name__ == "__main__":
	demo.launch()