Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -182,36 +182,53 @@ print("Q-Learning model trained!")
|
|
| 182 |
|
| 183 |
def q_learning_path(q_values, start, goal, max_steps=100):
|
| 184 |
"""Find path using trained Q-Learning model"""
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
| 187 |
|
| 188 |
-
for
|
| 189 |
-
|
| 190 |
-
|
| 191 |
|
| 192 |
-
|
| 193 |
-
break
|
| 194 |
-
|
| 195 |
-
# Get best action
|
| 196 |
action = np.argmax(q_values[y, x])
|
| 197 |
|
| 198 |
-
# Apply action
|
|
|
|
| 199 |
if action == 0 and y > 0: # up
|
| 200 |
-
|
| 201 |
elif action == 1 and x < COLS - 1: # right
|
| 202 |
-
|
| 203 |
elif action == 2 and y < ROWS - 1: # down
|
| 204 |
-
|
| 205 |
elif action == 3 and x > 0: # left
|
| 206 |
-
|
| 207 |
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
|
| 214 |
-
|
|
|
|
| 215 |
|
| 216 |
def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
|
| 217 |
start = (int(start_x), int(start_y))
|
|
|
|
| 182 |
|
| 183 |
def q_learning_path(q_values, start, goal, max_steps=100):
|
| 184 |
"""Find path using trained Q-Learning model"""
|
| 185 |
+
# Start: (x, y) tuple format
|
| 186 |
+
x, y = start
|
| 187 |
+
path = [start]
|
| 188 |
+
visited = set([start])
|
| 189 |
|
| 190 |
+
for step in range(max_steps):
|
| 191 |
+
if (x, y) == goal:
|
| 192 |
+
return path
|
| 193 |
|
| 194 |
+
# Get best action from Q-table [y, x, action]
|
|
|
|
|
|
|
|
|
|
| 195 |
action = np.argmax(q_values[y, x])
|
| 196 |
|
| 197 |
+
# Apply action (0=up, 1=right, 2=down, 3=left)
|
| 198 |
+
new_x, new_y = x, y
|
| 199 |
if action == 0 and y > 0: # up
|
| 200 |
+
new_y = y - 1
|
| 201 |
elif action == 1 and x < COLS - 1: # right
|
| 202 |
+
new_x = x + 1
|
| 203 |
elif action == 2 and y < ROWS - 1: # down
|
| 204 |
+
new_y = y + 1
|
| 205 |
elif action == 3 and x > 0: # left
|
| 206 |
+
new_x = x - 1
|
| 207 |
|
| 208 |
+
# Check if we're making progress
|
| 209 |
+
if (new_x, new_y) in visited:
|
| 210 |
+
# If stuck in loop, try alternative action
|
| 211 |
+
q_vals = q_values[y, x].copy()
|
| 212 |
+
q_vals[action] = -np.inf # Don't use same action
|
| 213 |
+
action = np.argmax(q_vals)
|
| 214 |
+
|
| 215 |
+
# Try alternative
|
| 216 |
+
new_x, new_y = x, y
|
| 217 |
+
if action == 0 and y > 0:
|
| 218 |
+
new_y = y - 1
|
| 219 |
+
elif action == 1 and x < COLS - 1:
|
| 220 |
+
new_x = x + 1
|
| 221 |
+
elif action == 2 and y < ROWS - 1:
|
| 222 |
+
new_y = y + 1
|
| 223 |
+
elif action == 3 and x > 0:
|
| 224 |
+
new_x = x - 1
|
| 225 |
|
| 226 |
+
x, y = new_x, new_y
|
| 227 |
+
path.append((x, y))
|
| 228 |
+
visited.add((x, y))
|
| 229 |
|
| 230 |
+
# Return path even if goal not reached exactly
|
| 231 |
+
return path if len(path) > 1 else None
|
| 232 |
|
| 233 |
def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
|
| 234 |
start = (int(start_x), int(start_y))
|