File size: 2,201 Bytes
55d9b20
 
 
 
 
 
 
 
 
 
79a885a
55d9b20
79a885a
55d9b20
 
79a885a
55d9b20
79a885a
55d9b20
79a885a
 
55d9b20
79a885a
 
 
 
55d9b20
79a885a
55d9b20
79a885a
55d9b20
 
 
 
 
79a885a
 
55d9b20
 
 
 
 
 
79a885a
 
 
55d9b20
79a885a
55d9b20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gymnasium as gym
import numpy as np
import gradio as gr
import io
import contextlib

def run_demo():
    buf = io.StringIO()
    with contextlib.redirect_stdout(buf):
        env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
        n_states, n_actions = env.observation_space.n, env.action_space.n
        action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
        grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
        tile_emoji = {'S': '🏁', 'F': '🧊', 'H': 'πŸ•³οΈ', 'G': '🎯'}

        def render_grid(state=None):
            for r, row in enumerate(grid_map):
                print("".join(" πŸ€– " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))

        # STEP 1: The Game
        print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")

        # STEP 2: The Grid
        print("STEP 2: The Grid")
        render_grid()
        print("Legend: 🏁 Start  🧊 Frozen  πŸ•³οΈ Hole  🎯 Goal  πŸ€– Agent\n")

        # STEP 3: The Q Table
        Q = np.zeros((n_states, n_actions))
        print("STEP 3: Q Table")
        print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
        for s in range(n_states):
            print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
        print()

        # STEP 4: Random Episode
        print("STEP 4: Random Episode")
        state, _ = env.reset(seed=3)
        render_grid(state)
        print()

        for t in range(15):
            action = env.action_space.sample()
            state, reward, terminated, truncated, _ = env.step(action)
            print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
            render_grid(state)
            if terminated or truncated:
                print("πŸŽ‰ GOAL!" if reward > 0 else "πŸ’€ HOLE!")
                break

        env.close()
    return buf.getvalue()

demo = gr.Interface(
    fn=run_demo,
    inputs=[],
    outputs=gr.Textbox(label="Output", lines=40),
    title="Q Learning Part 1: FrozenLake Demo",
    description="Click Submit to run one random episode on the FrozenLake grid.",
)

if __name__ == "__main__":
    demo.launch()