Ds0uz4 commited on
Commit
a507130
Β·
1 Parent(s): 6a5f04a
Files changed (2) hide show
  1. app.py +223 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import importlib.util
3
+ import time
4
+ import random
5
+ from collections import defaultdict
6
+
7
+ # Mapping for the Agent's "Eyes" (Radar)
8
+ RADAR_ENCODING = {
9
+ "EMPTY": 0,
10
+ "WALL": 1,
11
+ "GOAL": 2,
12
+ "ICE": 3,
13
+ "MUD": 4,
14
+ "DANGER": 5,
15
+ "CHARGER": 6,
16
+ "ENEMY": 7
17
+ }
18
+
19
+ class MegaWorldEnv:
20
+ def __init__(self):
21
+ self.start = (1, 1)
22
+ self.goal = (18, 18)
23
+
24
+ # 1. Generate Map
25
+ self.walls = self._generate_walls()
26
+
27
+ # 2. Hazards
28
+ self.ice = [(5,y) for y in range(5,15)] + [(15,y) for y in range(5,15)]
29
+ self.mud = [(x,10) for x in range(2,18)]
30
+
31
+ # Traps (Randomized locations)
32
+ self.traps = [(3,3), (8,8), (12,12), (17,17), (9,10), (11,10)]
33
+ random.shuffle(self.traps)
34
+
35
+ # Chargers
36
+ self.chargers = [(18,2), (10,10)]
37
+
38
+ # 3. ENEMIES (Simplified for Random Movement)
39
+ # We just need their starting positions now
40
+ self.enemies = [
41
+ {"pos": [5, 5]},
42
+ {"pos": [15, 5]},
43
+ {"pos": [12, 12]},
44
+ {"pos": [16, 16]},
45
+ {"pos": [8, 14]} # Added one more for fun
46
+ ]
47
+
48
+ def _generate_walls(self):
49
+ walls = []
50
+ for i in range(20):
51
+ if i % 4 == 0:
52
+ for j in range(5, 15):
53
+ walls.append((i, j))
54
+ walls += [(6,6), (7,7), (13,13), (14,14)]
55
+ return walls
56
+
57
+ def shaped_reward(self, old_pos, new_pos):
58
+ """
59
+ Guide the agent: Moving closer to goal = Positive Reward
60
+ """
61
+ old_d = abs(old_pos[0] - self.goal[0]) + abs(old_pos[1] - self.goal[1])
62
+ new_d = abs(new_pos[0] - self.goal[0]) + abs(new_pos[1] - self.goal[1])
63
+ return 3.0 * (old_d - new_d)
64
+
65
+ def get_radar(self, pos):
66
+ """
67
+ Returns what is in the 4 adjacent squares
68
+ """
69
+ x, y = pos
70
+ radar = {}
71
+ dirs = {"up": (x, y+1), "down": (x, y-1), "left": (x-1, y), "right": (x+1, y)}
72
+
73
+ for d, (nx, ny) in dirs.items():
74
+ info = "EMPTY"
75
+ if not (0 <= nx < 20 and 0 <= ny < 20): info = "WALL"
76
+ elif (nx, ny) in self.walls: info = "WALL"
77
+ elif (nx, ny) == self.goal: info = "GOAL"
78
+ elif (nx, ny) in self.ice: info = "ICE"
79
+ elif (nx, ny) in self.mud: info = "MUD"
80
+ elif (nx, ny) in self.traps: info = "DANGER"
81
+ elif (nx, ny) in self.chargers: info = "CHARGER"
82
+
83
+ # Check if any enemy is here
84
+ for e in self.enemies:
85
+ if tuple(e["pos"]) == (nx, ny):
86
+ info = "ENEMY"
87
+
88
+ radar[d] = RADAR_ENCODING[info]
89
+ return radar
90
+
91
+ def update_enemies(self, player_pos):
92
+ """
93
+ NEW LOGIC: RANDOM WALK
94
+ Enemies pick a random valid neighbor and move there.
95
+ """
96
+ for e in self.enemies:
97
+ x, y = e["pos"]
98
+ possible_moves = []
99
+
100
+ # Check Up, Down, Left, Right
101
+ candidates = [(x, y+1), (x, y-1), (x-1, y), (x+1, y)]
102
+
103
+ for nx, ny in candidates:
104
+ # Ensure they don't walk into walls or off the map
105
+ if 0 <= nx < 20 and 0 <= ny < 20 and (nx, ny) not in self.walls:
106
+ possible_moves.append((nx, ny))
107
+
108
+ # Pick a random move
109
+ if possible_moves:
110
+ e["pos"] = list(random.choice(possible_moves))
111
+
112
+ def render(self, player_pos, history, battery, score):
113
+ html = "<div style='background:#000;padding:10px;border-radius:12px; font-family: monospace;'>"
114
+ html += f"<div style='color:white; margin-bottom: 5px;'>πŸ”‹ {battery}% | πŸ† {score:.1f}</div>"
115
+ html += "<div style='display:grid;grid-template-columns:repeat(20,22px);gap:1px'>"
116
+
117
+ enemy_pos = [tuple(e["pos"]) for e in self.enemies]
118
+
119
+ for y in range(19, -1, -1):
120
+ for x in range(20):
121
+ pos = (x, y)
122
+ color = "#111"; char = ""
123
+
124
+ if pos in self.walls: color = "#555"
125
+ elif pos in self.ice: color = "#29b6f6"
126
+ elif pos in self.mud: color = "#4e342e"
127
+ elif pos in history: color = "#263238"
128
+
129
+ if pos == self.goal: char = "🏁"; color = "#4caf50"
130
+ if pos in self.chargers: char = "⚑"; color = "#fdd835"
131
+ if pos in enemy_pos: char = "πŸ‘Ύ"; color = "#d500f9" # Ghost icon
132
+
133
+ if pos == player_pos:
134
+ char = "πŸ€–"
135
+ color = "#2196f3" if battery > 20 else "#ff6f00"
136
+
137
+ html += f"<div style='width:22px;height:22px;background:{color};display:flex;align-items:center;justify-content:center;color:white;'>{char}</div>"
138
+
139
+ html += "</div></div>"
140
+ return html
141
+
142
+ def run_mega_simulation(file):
143
+ env = MegaWorldEnv()
144
+ if file is None:
145
+ yield env.render(env.start, [], 100, 0), {}
146
+ return
147
+
148
+ spec = importlib.util.spec_from_file_location("agent", file.name)
149
+ agent = importlib.util.module_from_spec(spec)
150
+ spec.loader.exec_module(agent)
151
+
152
+ pos = list(env.start)
153
+ battery = 100
154
+ score = 0
155
+ history = []
156
+
157
+ for step in range(300):
158
+ # 1. AI Decision
159
+ radar = env.get_radar(pos)
160
+ try:
161
+ action = agent.get_action(pos[:], radar, battery)
162
+ except: break
163
+
164
+ # 2. Movement Physics
165
+ dx, dy = [(0, 1), (0, -1), (-1, 0), (1, 0)][action]
166
+ prev_pos = pos[:]
167
+
168
+ nx, ny = pos[0] + dx, pos[1] + dy
169
+
170
+ # Wall/Bounds Check
171
+ if not (0 <= nx < 20 and 0 <= ny < 20) or (nx, ny) in env.walls:
172
+ nx, ny = pos # Hit wall, stay put
173
+ pos = [nx, ny]
174
+
175
+ # 3. Environment Updates
176
+ env.update_enemies(pos) # Enemies move randomly now
177
+ history.append(tuple(pos))
178
+
179
+ # 4. Scoring & Battery
180
+ battery -= 1
181
+ if tuple(pos) in env.mud: battery -= 5
182
+
183
+ reward = env.shaped_reward(tuple(prev_pos), tuple(pos))
184
+
185
+ if prev_pos == pos: reward -= 5 # Penalty for standing still
186
+ if tuple(pos) in env.traps:
187
+ reward -= 10; battery -= 10
188
+
189
+ done = False
190
+
191
+ # Check Collision with Enemies
192
+ if battery <= 0 or tuple(pos) in [tuple(e["pos"]) for e in env.enemies]:
193
+ reward -= 20; done = True
194
+
195
+ if tuple(pos) == env.goal:
196
+ reward += 1000; done = True
197
+
198
+ reward = max(reward, -10)
199
+ score += reward
200
+
201
+ # 5. RL Observation Hook (Optional)
202
+ if hasattr(agent, "observe"):
203
+ agent.observe(reward, pos, radar, battery, done)
204
+
205
+ yield env.render(tuple(pos), history, battery, score), {"step": step, "reward": round(reward, 2)}
206
+
207
+ if done: return
208
+ time.sleep(0.05)
209
+
210
+ # --- GRADIO LAUNCH ---
211
+ with gr.Blocks() as demo:
212
+ gr.Markdown("# 🌍 Super RL World: Random Chaos Edition")
213
+ with gr.Row():
214
+ game = gr.HTML(MegaWorldEnv().render((1,1), [], 100, 0))
215
+ with gr.Column():
216
+ file = gr.File(label="Upload agent.py")
217
+ btn = gr.Button("πŸš€ Run Simulation")
218
+ log = gr.JSON(label="Live Stats")
219
+
220
+ btn.click(run_mega_simulation, file, [game, log])
221
+
222
+ demo.launch()
223
+ #hello
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ gymnasium==0.29.1
3
+ stable-baselines3==2.2.1
4
+ shimmy>=1.3.0
5
+ numpy
6
+ torch
7
+ opencv-python-headless
8
+ pillow