files
Browse files- app.py +223 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import importlib.util
|
| 3 |
+
import time
|
| 4 |
+
import random
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
|
| 7 |
+
# Mapping for the Agent's "Eyes" (Radar)
|
| 8 |
+
RADAR_ENCODING = {
|
| 9 |
+
"EMPTY": 0,
|
| 10 |
+
"WALL": 1,
|
| 11 |
+
"GOAL": 2,
|
| 12 |
+
"ICE": 3,
|
| 13 |
+
"MUD": 4,
|
| 14 |
+
"DANGER": 5,
|
| 15 |
+
"CHARGER": 6,
|
| 16 |
+
"ENEMY": 7
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
class MegaWorldEnv:
|
| 20 |
+
def __init__(self):
|
| 21 |
+
self.start = (1, 1)
|
| 22 |
+
self.goal = (18, 18)
|
| 23 |
+
|
| 24 |
+
# 1. Generate Map
|
| 25 |
+
self.walls = self._generate_walls()
|
| 26 |
+
|
| 27 |
+
# 2. Hazards
|
| 28 |
+
self.ice = [(5,y) for y in range(5,15)] + [(15,y) for y in range(5,15)]
|
| 29 |
+
self.mud = [(x,10) for x in range(2,18)]
|
| 30 |
+
|
| 31 |
+
# Traps (Randomized locations)
|
| 32 |
+
self.traps = [(3,3), (8,8), (12,12), (17,17), (9,10), (11,10)]
|
| 33 |
+
random.shuffle(self.traps)
|
| 34 |
+
|
| 35 |
+
# Chargers
|
| 36 |
+
self.chargers = [(18,2), (10,10)]
|
| 37 |
+
|
| 38 |
+
# 3. ENEMIES (Simplified for Random Movement)
|
| 39 |
+
# We just need their starting positions now
|
| 40 |
+
self.enemies = [
|
| 41 |
+
{"pos": [5, 5]},
|
| 42 |
+
{"pos": [15, 5]},
|
| 43 |
+
{"pos": [12, 12]},
|
| 44 |
+
{"pos": [16, 16]},
|
| 45 |
+
{"pos": [8, 14]} # Added one more for fun
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
def _generate_walls(self):
|
| 49 |
+
walls = []
|
| 50 |
+
for i in range(20):
|
| 51 |
+
if i % 4 == 0:
|
| 52 |
+
for j in range(5, 15):
|
| 53 |
+
walls.append((i, j))
|
| 54 |
+
walls += [(6,6), (7,7), (13,13), (14,14)]
|
| 55 |
+
return walls
|
| 56 |
+
|
| 57 |
+
def shaped_reward(self, old_pos, new_pos):
|
| 58 |
+
"""
|
| 59 |
+
Guide the agent: Moving closer to goal = Positive Reward
|
| 60 |
+
"""
|
| 61 |
+
old_d = abs(old_pos[0] - self.goal[0]) + abs(old_pos[1] - self.goal[1])
|
| 62 |
+
new_d = abs(new_pos[0] - self.goal[0]) + abs(new_pos[1] - self.goal[1])
|
| 63 |
+
return 3.0 * (old_d - new_d)
|
| 64 |
+
|
| 65 |
+
def get_radar(self, pos):
|
| 66 |
+
"""
|
| 67 |
+
Returns what is in the 4 adjacent squares
|
| 68 |
+
"""
|
| 69 |
+
x, y = pos
|
| 70 |
+
radar = {}
|
| 71 |
+
dirs = {"up": (x, y+1), "down": (x, y-1), "left": (x-1, y), "right": (x+1, y)}
|
| 72 |
+
|
| 73 |
+
for d, (nx, ny) in dirs.items():
|
| 74 |
+
info = "EMPTY"
|
| 75 |
+
if not (0 <= nx < 20 and 0 <= ny < 20): info = "WALL"
|
| 76 |
+
elif (nx, ny) in self.walls: info = "WALL"
|
| 77 |
+
elif (nx, ny) == self.goal: info = "GOAL"
|
| 78 |
+
elif (nx, ny) in self.ice: info = "ICE"
|
| 79 |
+
elif (nx, ny) in self.mud: info = "MUD"
|
| 80 |
+
elif (nx, ny) in self.traps: info = "DANGER"
|
| 81 |
+
elif (nx, ny) in self.chargers: info = "CHARGER"
|
| 82 |
+
|
| 83 |
+
# Check if any enemy is here
|
| 84 |
+
for e in self.enemies:
|
| 85 |
+
if tuple(e["pos"]) == (nx, ny):
|
| 86 |
+
info = "ENEMY"
|
| 87 |
+
|
| 88 |
+
radar[d] = RADAR_ENCODING[info]
|
| 89 |
+
return radar
|
| 90 |
+
|
| 91 |
+
def update_enemies(self, player_pos):
|
| 92 |
+
"""
|
| 93 |
+
NEW LOGIC: RANDOM WALK
|
| 94 |
+
Enemies pick a random valid neighbor and move there.
|
| 95 |
+
"""
|
| 96 |
+
for e in self.enemies:
|
| 97 |
+
x, y = e["pos"]
|
| 98 |
+
possible_moves = []
|
| 99 |
+
|
| 100 |
+
# Check Up, Down, Left, Right
|
| 101 |
+
candidates = [(x, y+1), (x, y-1), (x-1, y), (x+1, y)]
|
| 102 |
+
|
| 103 |
+
for nx, ny in candidates:
|
| 104 |
+
# Ensure they don't walk into walls or off the map
|
| 105 |
+
if 0 <= nx < 20 and 0 <= ny < 20 and (nx, ny) not in self.walls:
|
| 106 |
+
possible_moves.append((nx, ny))
|
| 107 |
+
|
| 108 |
+
# Pick a random move
|
| 109 |
+
if possible_moves:
|
| 110 |
+
e["pos"] = list(random.choice(possible_moves))
|
| 111 |
+
|
| 112 |
+
def render(self, player_pos, history, battery, score):
|
| 113 |
+
html = "<div style='background:#000;padding:10px;border-radius:12px; font-family: monospace;'>"
|
| 114 |
+
html += f"<div style='color:white; margin-bottom: 5px;'>π {battery}% | π {score:.1f}</div>"
|
| 115 |
+
html += "<div style='display:grid;grid-template-columns:repeat(20,22px);gap:1px'>"
|
| 116 |
+
|
| 117 |
+
enemy_pos = [tuple(e["pos"]) for e in self.enemies]
|
| 118 |
+
|
| 119 |
+
for y in range(19, -1, -1):
|
| 120 |
+
for x in range(20):
|
| 121 |
+
pos = (x, y)
|
| 122 |
+
color = "#111"; char = ""
|
| 123 |
+
|
| 124 |
+
if pos in self.walls: color = "#555"
|
| 125 |
+
elif pos in self.ice: color = "#29b6f6"
|
| 126 |
+
elif pos in self.mud: color = "#4e342e"
|
| 127 |
+
elif pos in history: color = "#263238"
|
| 128 |
+
|
| 129 |
+
if pos == self.goal: char = "π"; color = "#4caf50"
|
| 130 |
+
if pos in self.chargers: char = "β‘"; color = "#fdd835"
|
| 131 |
+
if pos in enemy_pos: char = "πΎ"; color = "#d500f9" # Ghost icon
|
| 132 |
+
|
| 133 |
+
if pos == player_pos:
|
| 134 |
+
char = "π€"
|
| 135 |
+
color = "#2196f3" if battery > 20 else "#ff6f00"
|
| 136 |
+
|
| 137 |
+
html += f"<div style='width:22px;height:22px;background:{color};display:flex;align-items:center;justify-content:center;color:white;'>{char}</div>"
|
| 138 |
+
|
| 139 |
+
html += "</div></div>"
|
| 140 |
+
return html
|
| 141 |
+
|
| 142 |
+
def run_mega_simulation(file):
|
| 143 |
+
env = MegaWorldEnv()
|
| 144 |
+
if file is None:
|
| 145 |
+
yield env.render(env.start, [], 100, 0), {}
|
| 146 |
+
return
|
| 147 |
+
|
| 148 |
+
spec = importlib.util.spec_from_file_location("agent", file.name)
|
| 149 |
+
agent = importlib.util.module_from_spec(spec)
|
| 150 |
+
spec.loader.exec_module(agent)
|
| 151 |
+
|
| 152 |
+
pos = list(env.start)
|
| 153 |
+
battery = 100
|
| 154 |
+
score = 0
|
| 155 |
+
history = []
|
| 156 |
+
|
| 157 |
+
for step in range(300):
|
| 158 |
+
# 1. AI Decision
|
| 159 |
+
radar = env.get_radar(pos)
|
| 160 |
+
try:
|
| 161 |
+
action = agent.get_action(pos[:], radar, battery)
|
| 162 |
+
except: break
|
| 163 |
+
|
| 164 |
+
# 2. Movement Physics
|
| 165 |
+
dx, dy = [(0, 1), (0, -1), (-1, 0), (1, 0)][action]
|
| 166 |
+
prev_pos = pos[:]
|
| 167 |
+
|
| 168 |
+
nx, ny = pos[0] + dx, pos[1] + dy
|
| 169 |
+
|
| 170 |
+
# Wall/Bounds Check
|
| 171 |
+
if not (0 <= nx < 20 and 0 <= ny < 20) or (nx, ny) in env.walls:
|
| 172 |
+
nx, ny = pos # Hit wall, stay put
|
| 173 |
+
pos = [nx, ny]
|
| 174 |
+
|
| 175 |
+
# 3. Environment Updates
|
| 176 |
+
env.update_enemies(pos) # Enemies move randomly now
|
| 177 |
+
history.append(tuple(pos))
|
| 178 |
+
|
| 179 |
+
# 4. Scoring & Battery
|
| 180 |
+
battery -= 1
|
| 181 |
+
if tuple(pos) in env.mud: battery -= 5
|
| 182 |
+
|
| 183 |
+
reward = env.shaped_reward(tuple(prev_pos), tuple(pos))
|
| 184 |
+
|
| 185 |
+
if prev_pos == pos: reward -= 5 # Penalty for standing still
|
| 186 |
+
if tuple(pos) in env.traps:
|
| 187 |
+
reward -= 10; battery -= 10
|
| 188 |
+
|
| 189 |
+
done = False
|
| 190 |
+
|
| 191 |
+
# Check Collision with Enemies
|
| 192 |
+
if battery <= 0 or tuple(pos) in [tuple(e["pos"]) for e in env.enemies]:
|
| 193 |
+
reward -= 20; done = True
|
| 194 |
+
|
| 195 |
+
if tuple(pos) == env.goal:
|
| 196 |
+
reward += 1000; done = True
|
| 197 |
+
|
| 198 |
+
reward = max(reward, -10)
|
| 199 |
+
score += reward
|
| 200 |
+
|
| 201 |
+
# 5. RL Observation Hook (Optional)
|
| 202 |
+
if hasattr(agent, "observe"):
|
| 203 |
+
agent.observe(reward, pos, radar, battery, done)
|
| 204 |
+
|
| 205 |
+
yield env.render(tuple(pos), history, battery, score), {"step": step, "reward": round(reward, 2)}
|
| 206 |
+
|
| 207 |
+
if done: return
|
| 208 |
+
time.sleep(0.05)
|
| 209 |
+
|
| 210 |
+
# --- GRADIO LAUNCH ---
|
| 211 |
+
with gr.Blocks() as demo:
|
| 212 |
+
gr.Markdown("# π Super RL World: Random Chaos Edition")
|
| 213 |
+
with gr.Row():
|
| 214 |
+
game = gr.HTML(MegaWorldEnv().render((1,1), [], 100, 0))
|
| 215 |
+
with gr.Column():
|
| 216 |
+
file = gr.File(label="Upload agent.py")
|
| 217 |
+
btn = gr.Button("π Run Simulation")
|
| 218 |
+
log = gr.JSON(label="Live Stats")
|
| 219 |
+
|
| 220 |
+
btn.click(run_mega_simulation, file, [game, log])
|
| 221 |
+
|
| 222 |
+
demo.launch()
|
| 223 |
+
#hello
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
gymnasium==0.29.1
|
| 3 |
+
stable-baselines3==2.2.1
|
| 4 |
+
shimmy>=1.3.0
|
| 5 |
+
numpy
|
| 6 |
+
torch
|
| 7 |
+
opencv-python-headless
|
| 8 |
+
pillow
|