RL / app.py
stevafernandes's picture
Update app.py
79a885a verified
import gymnasium as gym
import numpy as np
import gradio as gr
import io
import contextlib
def run_demo():
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
n_states, n_actions = env.observation_space.n, env.action_space.n
action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
tile_emoji = {'S': '🏁', 'F': '🧊', 'H': 'πŸ•³οΈ', 'G': '🎯'}
def render_grid(state=None):
for r, row in enumerate(grid_map):
print("".join(" πŸ€– " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))
# STEP 1: The Game
print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")
# STEP 2: The Grid
print("STEP 2: The Grid")
render_grid()
print("Legend: 🏁 Start 🧊 Frozen πŸ•³οΈ Hole 🎯 Goal πŸ€– Agent\n")
# STEP 3: The Q Table
Q = np.zeros((n_states, n_actions))
print("STEP 3: Q Table")
print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
for s in range(n_states):
print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
print()
# STEP 4: Random Episode
print("STEP 4: Random Episode")
state, _ = env.reset(seed=3)
render_grid(state)
print()
for t in range(15):
action = env.action_space.sample()
state, reward, terminated, truncated, _ = env.step(action)
print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
render_grid(state)
if terminated or truncated:
print("πŸŽ‰ GOAL!" if reward > 0 else "πŸ’€ HOLE!")
break
env.close()
return buf.getvalue()
demo = gr.Interface(
fn=run_demo,
inputs=[],
outputs=gr.Textbox(label="Output", lines=40),
title="Q Learning Part 1: FrozenLake Demo",
description="Click Submit to run one random episode on the FrozenLake grid.",
)
if __name__ == "__main__":
demo.launch()