Spaces:

stevafernandes
/

RL

Sleeping

App Files Files Community

stevafernandes commited on Apr 20

Commit

79a885a

verified ·

1 Parent(s): 2a6e89b

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -69

app.py CHANGED Viewed

@@ -1,107 +1,56 @@
-# Q Learning Part 1: Visual Concept Demo
 import gymnasium as gym
 import numpy as np
 import gradio as gr
 import io
 import contextlib
 def run_demo():
     buf = io.StringIO()
     with contextlib.redirect_stdout(buf):
-        # -----------------------------------------------------
-        # STEP 1: Create the FrozenLake game
-        # -----------------------------------------------------
         env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
-        n_states = env.observation_space.n   # 16
-        n_actions = env.action_space.n       # 4
         action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
-        print("=" * 45)
-        print("  STEP 1: The FrozenLake Game")
-        print("=" * 45)
-        print(f"States : {n_states}")
-        print(f"Actions: {n_actions}  ({', '.join(action_names)})")
-        print()
-        # -----------------------------------------------------
-        # STEP 2: Draw the grid with emoji
-        # -----------------------------------------------------
-        grid_map = [
-            ['S', 'F', 'F', 'F'],
-            ['F', 'H', 'F', 'H'],
-            ['F', 'F', 'F', 'F'],
-            ['F', 'H', 'F', 'G'],
-        ]
         tile_emoji = {'S': '🏁', 'F': '🧊', 'H': '🕳️', 'G': '🎯'}
-        def render_grid(agent_state=None):
             for r, row in enumerate(grid_map):
-                line = ""
-                for c, tile in enumerate(row):
-                    s = r * 4 + c
-                    line += " 🤖 " if s == agent_state else f" {tile_emoji[tile]} "
-                print(line)
-        print("=" * 45)
-        print("  STEP 2: The Grid")
-        print("=" * 45)
-        render_grid()
-        print("\nLegend:  🏁 Start   🧊 Frozen   🕳️ Hole   🎯 Goal   🤖 Agent")
-        print()
-        # -----------------------------------------------------
-        # STEP 3: Build the empty Q table
-        # -----------------------------------------------------
         Q = np.zeros((n_states, n_actions))
-        print("=" * 45)
-        print("  STEP 3: The Q Table (starts empty)")
-        print("=" * 45)
         print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
-        print("=" * 45)
         for s in range(n_states):
             print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
         print()
-        # -----------------------------------------------------
-        # STEP 4: Play one random episode (the RL cycle)
-        # -----------------------------------------------------
-        print("=" * 45)
-        print("  STEP 4: One Random Episode")
-        print("=" * 45)
         state, _ = env.reset(seed=3)
-        print("Starting state:")
         render_grid(state)
         print()
         for t in range(15):
             action = env.action_space.sample()
-            next_state, reward, terminated, truncated, _ = env.step(action)
-            print(f"--- Step {t + 1} ---")
-            print(f"Action: {action_names[action]}   Reward: {reward}")
-            render_grid(next_state)
-            print()
-            state = next_state
             if terminated or truncated:
-                outcome = "🎉 REACHED THE GOAL!" if reward > 0 else "💀 FELL IN A HOLE!"
-                print(outcome)
                 break
         env.close()
     return buf.getvalue()
 demo = gr.Interface(
     fn=run_demo,
     inputs=[],

 import gymnasium as gym
 import numpy as np
 import gradio as gr
 import io
 import contextlib
 def run_demo():
     buf = io.StringIO()
     with contextlib.redirect_stdout(buf):
         env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
+        n_states, n_actions = env.observation_space.n, env.action_space.n
         action_names = ["LEFT", "DOWN", "RIGHT", "UP"]
+        grid_map = ['SFFF', 'FHFH', 'FFFF', 'FHFG']
         tile_emoji = {'S': '🏁', 'F': '🧊', 'H': '🕳️', 'G': '🎯'}
+        def render_grid(state=None):
             for r, row in enumerate(grid_map):
+                print("".join(" 🤖 " if r * 4 + c == state else f" {tile_emoji[t]} " for c, t in enumerate(row)))
+        # STEP 1: The Game
+        print(f"STEP 1: FrozenLake\nStates: {n_states}, Actions: {n_actions} ({', '.join(action_names)})\n")
+        # STEP 2: The Grid
+        print("STEP 2: The Grid")
+        render_grid()
+        print("Legend: 🏁 Start  🧊 Frozen  🕳️ Hole  🎯 Goal  🤖 Agent\n")
+        # STEP 3: The Q Table
         Q = np.zeros((n_states, n_actions))
+        print("STEP 3: Q Table")
         print(f"{'State':<7}" + "".join(f"{a:<8}" for a in action_names))
         for s in range(n_states):
             print(f"{s:<7}" + "".join(f"{Q[s, a]:<8.2f}" for a in range(n_actions)))
         print()
+        # STEP 4: Random Episode
+        print("STEP 4: Random Episode")
         state, _ = env.reset(seed=3)
         render_grid(state)
         print()
         for t in range(15):
             action = env.action_space.sample()
+            state, reward, terminated, truncated, _ = env.step(action)
+            print(f"Step {t + 1}: {action_names[action]}, Reward: {reward}")
+            render_grid(state)
             if terminated or truncated:
+                print("🎉 GOAL!" if reward > 0 else "💀 HOLE!")
                 break
         env.close()
     return buf.getvalue()
 demo = gr.Interface(
     fn=run_demo,
     inputs=[],