Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,107 +1,56 @@
|
|
| 1 |
-
# Q Learning Part 1: Visual Concept Demo
|
| 2 |
-
|
| 3 |
import gymnasium as gym
|
| 4 |
import numpy as np
|
| 5 |
import gradio as gr
|
| 6 |
import io
|
| 7 |
import contextlib
|
| 8 |
|
| 9 |
-
|
| 10 |
def run_demo():
|
| 11 |
buf = io.StringIO()
|
| 12 |
with contextlib.redirect_stdout(buf):
|
| 13 |
-
|
| 14 |
-
# -----------------------------------------------------
|
| 15 |
-
# STEP 1: Create the FrozenLake game
|
| 16 |
-
# -----------------------------------------------------
|
| 17 |
env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
|
| 18 |
-
n_states = env.observation_space.n
|
| 19 |
-
n_actions = env.action_space.n # 4
|
| 20 |
action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
|
| 21 |
-
|
| 22 |
-
print("=" * 45)
|
| 23 |
-
print(" STEP 1: The FrozenLake Game")
|
| 24 |
-
print("=" * 45)
|
| 25 |
-
print(f"States : {n_states}")
|
| 26 |
-
print(f"Actions: {n_actions} ({', '.join(action_names)})")
|
| 27 |
-
print()
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
# -----------------------------------------------------
|
| 31 |
-
# STEP 2: Draw the grid with emoji
|
| 32 |
-
# -----------------------------------------------------
|
| 33 |
-
grid_map = [
|
| 34 |
-
['S', 'F', 'F', 'F'],
|
| 35 |
-
['F', 'H', 'F', 'H'],
|
| 36 |
-
['F', 'F', 'F', 'F'],
|
| 37 |
-
['F', 'H', 'F', 'G'],
|
| 38 |
-
]
|
| 39 |
-
|
| 40 |
tile_emoji = {'S': 'π', 'F': 'π§', 'H': 'π³οΈ', 'G': 'π―'}
|
| 41 |
|
| 42 |
-
def render_grid(
|
| 43 |
for r, row in enumerate(grid_map):
|
| 44 |
-
|
| 45 |
-
for c, tile in enumerate(row):
|
| 46 |
-
s = r * 4 + c
|
| 47 |
-
line += " π€ " if s == agent_state else f" {tile_emoji[tile]} "
|
| 48 |
-
print(line)
|
| 49 |
|
| 50 |
-
|
| 51 |
-
print("
|
| 52 |
-
print("=" * 45)
|
| 53 |
-
render_grid()
|
| 54 |
-
print("\nLegend: π Start π§ Frozen π³οΈ Hole π― Goal π€ Agent")
|
| 55 |
-
print()
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
# STEP 3: Build the empty Q table
|
| 60 |
-
# -----------------------------------------------------
|
| 61 |
Q = np.zeros((n_states, n_actions))
|
| 62 |
-
|
| 63 |
-
print("=" * 45)
|
| 64 |
-
print(" STEP 3: The Q Table (starts empty)")
|
| 65 |
-
print("=" * 45)
|
| 66 |
print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
|
| 67 |
-
print("=" * 45)
|
| 68 |
for s in range(n_states):
|
| 69 |
print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
|
| 70 |
print()
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
# STEP 4: Play one random episode (the RL cycle)
|
| 75 |
-
# -----------------------------------------------------
|
| 76 |
-
print("=" * 45)
|
| 77 |
-
print(" STEP 4: One Random Episode")
|
| 78 |
-
print("=" * 45)
|
| 79 |
-
|
| 80 |
state, _ = env.reset(seed=3)
|
| 81 |
-
print("Starting state:")
|
| 82 |
render_grid(state)
|
| 83 |
print()
|
| 84 |
|
| 85 |
for t in range(15):
|
| 86 |
action = env.action_space.sample()
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
print(f"Action: {action_names[action]} Reward: {reward}")
|
| 91 |
-
render_grid(next_state)
|
| 92 |
-
print()
|
| 93 |
-
|
| 94 |
-
state = next_state
|
| 95 |
if terminated or truncated:
|
| 96 |
-
|
| 97 |
-
print(outcome)
|
| 98 |
break
|
| 99 |
|
| 100 |
env.close()
|
| 101 |
-
|
| 102 |
return buf.getvalue()
|
| 103 |
|
| 104 |
-
|
| 105 |
demo = gr.Interface(
|
| 106 |
fn=run_demo,
|
| 107 |
inputs=[],
|
|
|
|
|
|
|
|
|
|
| 1 |
import gymnasium as gym
|
| 2 |
import numpy as np
|
| 3 |
import gradio as gr
|
| 4 |
import io
|
| 5 |
import contextlib
|
| 6 |
|
|
|
|
| 7 |
def run_demo():
|
| 8 |
buf = io.StringIO()
|
| 9 |
with contextlib.redirect_stdout(buf):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
|
| 11 |
+
n_states, n_actions = env.observation_space.n, env.action_space.n
|
|
|
|
| 12 |
action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
|
| 13 |
+
grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
tile_emoji = {'S': 'π', 'F': 'π§', 'H': 'π³οΈ', 'G': 'π―'}
|
| 15 |
|
| 16 |
+
def render_grid(state=None):
|
| 17 |
for r, row in enumerate(grid_map):
|
| 18 |
+
print("".join(" π€ " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
# STEP 1: The Game
|
| 21 |
+
print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# STEP 2: The Grid
|
| 24 |
+
print("STEP 2: The Grid")
|
| 25 |
+
render_grid()
|
| 26 |
+
print("Legend: π Start π§ Frozen π³οΈ Hole π― Goal π€ Agent\n")
|
| 27 |
|
| 28 |
+
# STEP 3: The Q Table
|
|
|
|
|
|
|
| 29 |
Q = np.zeros((n_states, n_actions))
|
| 30 |
+
print("STEP 3: Q Table")
|
|
|
|
|
|
|
|
|
|
| 31 |
print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
|
|
|
|
| 32 |
for s in range(n_states):
|
| 33 |
print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
|
| 34 |
print()
|
| 35 |
|
| 36 |
+
# STEP 4: Random Episode
|
| 37 |
+
print("STEP 4: Random Episode")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
state, _ = env.reset(seed=3)
|
|
|
|
| 39 |
render_grid(state)
|
| 40 |
print()
|
| 41 |
|
| 42 |
for t in range(15):
|
| 43 |
action = env.action_space.sample()
|
| 44 |
+
state, reward, terminated, truncated, _ = env.step(action)
|
| 45 |
+
print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
|
| 46 |
+
render_grid(state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
if terminated or truncated:
|
| 48 |
+
print("π GOAL!" if reward > 0 else "π HOLE!")
|
|
|
|
| 49 |
break
|
| 50 |
|
| 51 |
env.close()
|
|
|
|
| 52 |
return buf.getvalue()
|
| 53 |
|
|
|
|
| 54 |
demo = gr.Interface(
|
| 55 |
fn=run_demo,
|
| 56 |
inputs=[],
|