import gymnasium as gym import numpy as np import gradio as gr import io import contextlib def run_demo(): buf = io.StringIO() with contextlib.redirect_stdout(buf): env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False) n_states, n_actions = env.observation_space.n, env.action_space.n action_names = ["LEFT", "DOWN", "RIGHT", "UP"] grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG'] tile_emoji = {'S': '🏁', 'F': '🧊', 'H': '🕳️', 'G': '🎯'} def render_grid(state=None): for r, row in enumerate(grid_map): print("".join(" 🤖 " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row))) # STEP 1: The Game print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n") # STEP 2: The Grid print("STEP 2: The Grid") render_grid() print("Legend: 🏁 Start 🧊 Frozen 🕳️ Hole 🎯 Goal 🤖 Agent\n") # STEP 3: The Q Table Q = np.zeros((n_states, n_actions)) print("STEP 3: Q Table") print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names)) for s in range(n_states): print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions))) print() # STEP 4: Random Episode print("STEP 4: Random Episode") state, _ = env.reset(seed=3) render_grid(state) print() for t in range(15): action = env.action_space.sample() state, reward, terminated, truncated, _ = env.step(action) print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}") render_grid(state) if terminated or truncated: print("🎉 GOAL!" if reward > 0 else "💀 HOLE!") break env.close() return buf.getvalue() demo = gr.Interface( fn=run_demo, inputs=[], outputs=gr.Textbox(label="Output", lines=40), title="Q Learning Part 1: FrozenLake Demo", description="Click Submit to run one random episode on the FrozenLake grid.", ) if __name__ == "__main__": demo.launch()