Spaces:
Sleeping
Sleeping
File size: 2,201 Bytes
55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 79a885a 55d9b20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import gymnasium as gym
import numpy as np
import gradio as gr
import io
import contextlib
def run_demo():
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
n_states, n_actions = env.observation_space.n, env.action_space.n
action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
tile_emoji = {'S': 'π', 'F': 'π§', 'H': 'π³οΈ', 'G': 'π―'}
def render_grid(state=None):
for r, row in enumerate(grid_map):
print("".join(" π€ " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))
# STEP 1: The Game
print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")
# STEP 2: The Grid
print("STEP 2: The Grid")
render_grid()
print("Legend: π Start π§ Frozen π³οΈ Hole π― Goal π€ Agent\n")
# STEP 3: The Q Table
Q = np.zeros((n_states, n_actions))
print("STEP 3: Q Table")
print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
for s in range(n_states):
print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
print()
# STEP 4: Random Episode
print("STEP 4: Random Episode")
state, _ = env.reset(seed=3)
render_grid(state)
print()
for t in range(15):
action = env.action_space.sample()
state, reward, terminated, truncated, _ = env.step(action)
print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
render_grid(state)
if terminated or truncated:
print("π GOAL!" if reward > 0 else "π HOLE!")
break
env.close()
return buf.getvalue()
demo = gr.Interface(
fn=run_demo,
inputs=[],
outputs=gr.Textbox(label="Output", lines=40),
title="Q Learning Part 1: FrozenLake Demo",
description="Click Submit to run one random episode on the FrozenLake grid.",
)
if __name__ == "__main__":
demo.launch() |