stevafernandes commited on
Commit
79a885a
Β·
verified Β·
1 Parent(s): 2a6e89b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -69
app.py CHANGED
@@ -1,107 +1,56 @@
1
- # Q Learning Part 1: Visual Concept Demo
2
-
3
  import gymnasium as gym
4
  import numpy as np
5
  import gradio as gr
6
  import io
7
  import contextlib
8
 
9
-
10
  def run_demo():
11
  buf = io.StringIO()
12
  with contextlib.redirect_stdout(buf):
13
-
14
- # -----------------------------------------------------
15
- # STEP 1: Create the FrozenLake game
16
- # -----------------------------------------------------
17
  env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
18
- n_states = env.observation_space.n # 16
19
- n_actions = env.action_space.n # 4
20
  action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
21
-
22
- print("=" * 45)
23
- print(" STEP 1: The FrozenLake Game")
24
- print("=" * 45)
25
- print(f"States : {n_states}")
26
- print(f"Actions: {n_actions} ({', '.join(action_names)})")
27
- print()
28
-
29
-
30
- # -----------------------------------------------------
31
- # STEP 2: Draw the grid with emoji
32
- # -----------------------------------------------------
33
- grid_map = [
34
- ['S', 'F', 'F', 'F'],
35
- ['F', 'H', 'F', 'H'],
36
- ['F', 'F', 'F', 'F'],
37
- ['F', 'H', 'F', 'G'],
38
- ]
39
-
40
  tile_emoji = {'S': '🏁', 'F': '🧊', 'H': 'πŸ•³οΈ', 'G': '🎯'}
41
 
42
- def render_grid(agent_state=None):
43
  for r, row in enumerate(grid_map):
44
- line = ""
45
- for c, tile in enumerate(row):
46
- s = r * 4 + c
47
- line += " πŸ€– " if s == agent_state else f" {tile_emoji[tile]} "
48
- print(line)
49
 
50
- print("=" * 45)
51
- print(" STEP 2: The Grid")
52
- print("=" * 45)
53
- render_grid()
54
- print("\nLegend: 🏁 Start 🧊 Frozen πŸ•³οΈ Hole 🎯 Goal πŸ€– Agent")
55
- print()
56
 
 
 
 
 
57
 
58
- # -----------------------------------------------------
59
- # STEP 3: Build the empty Q table
60
- # -----------------------------------------------------
61
  Q = np.zeros((n_states, n_actions))
62
-
63
- print("=" * 45)
64
- print(" STEP 3: The Q Table (starts empty)")
65
- print("=" * 45)
66
  print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
67
- print("=" * 45)
68
  for s in range(n_states):
69
  print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
70
  print()
71
 
72
-
73
- # -----------------------------------------------------
74
- # STEP 4: Play one random episode (the RL cycle)
75
- # -----------------------------------------------------
76
- print("=" * 45)
77
- print(" STEP 4: One Random Episode")
78
- print("=" * 45)
79
-
80
  state, _ = env.reset(seed=3)
81
- print("Starting state:")
82
  render_grid(state)
83
  print()
84
 
85
  for t in range(15):
86
  action = env.action_space.sample()
87
- next_state, reward, terminated, truncated, _ = env.step(action)
88
-
89
- print(f"--- Step {t + 1} ---")
90
- print(f"Action: {action_names[action]} Reward: {reward}")
91
- render_grid(next_state)
92
- print()
93
-
94
- state = next_state
95
  if terminated or truncated:
96
- outcome = "πŸŽ‰ REACHED THE GOAL!" if reward > 0 else "πŸ’€ FELL IN A HOLE!"
97
- print(outcome)
98
  break
99
 
100
  env.close()
101
-
102
  return buf.getvalue()
103
 
104
-
105
  demo = gr.Interface(
106
  fn=run_demo,
107
  inputs=[],
 
 
 
1
  import gymnasium as gym
2
  import numpy as np
3
  import gradio as gr
4
  import io
5
  import contextlib
6
 
 
7
  def run_demo():
8
  buf = io.StringIO()
9
  with contextlib.redirect_stdout(buf):
 
 
 
 
10
  env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
11
+ n_states, n_actions = env.observation_space.n, env.action_space.n
 
12
  action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
13
+ grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  tile_emoji = {'S': '🏁', 'F': '🧊', 'H': 'πŸ•³οΈ', 'G': '🎯'}
15
 
16
+ def render_grid(state=None):
17
  for r, row in enumerate(grid_map):
18
+ print("".join(" πŸ€– " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))
 
 
 
 
19
 
20
+ # STEP 1: The Game
21
+ print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")
 
 
 
 
22
 
23
+ # STEP 2: The Grid
24
+ print("STEP 2: The Grid")
25
+ render_grid()
26
+ print("Legend: 🏁 Start 🧊 Frozen πŸ•³οΈ Hole 🎯 Goal πŸ€– Agent\n")
27
 
28
+ # STEP 3: The Q Table
 
 
29
  Q = np.zeros((n_states, n_actions))
30
+ print("STEP 3: Q Table")
 
 
 
31
  print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
 
32
  for s in range(n_states):
33
  print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
34
  print()
35
 
36
+ # STEP 4: Random Episode
37
+ print("STEP 4: Random Episode")
 
 
 
 
 
 
38
  state, _ = env.reset(seed=3)
 
39
  render_grid(state)
40
  print()
41
 
42
  for t in range(15):
43
  action = env.action_space.sample()
44
+ state, reward, terminated, truncated, _ = env.step(action)
45
+ print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
46
+ render_grid(state)
 
 
 
 
 
47
  if terminated or truncated:
48
+ print("πŸŽ‰ GOAL!" if reward > 0 else "πŸ’€ HOLE!")
 
49
  break
50
 
51
  env.close()
 
52
  return buf.getvalue()
53
 
 
54
  demo = gr.Interface(
55
  fn=run_demo,
56
  inputs=[],