Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -180,9 +180,17 @@ print("Training Q-Learning model...")
|
|
| 180 |
Q_VALUES = q_learning_train(GRID)
|
| 181 |
print("Q-Learning model trained!")
|
| 182 |
|
| 183 |
-
def q_learning_path(q_values, start, goal, max_steps=
|
| 184 |
-
"""Find path using
|
| 185 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
x, y = start
|
| 187 |
path = [start]
|
| 188 |
visited = set([start])
|
|
@@ -191,43 +199,75 @@ def q_learning_path(q_values, start, goal, max_steps=100):
|
|
| 191 |
if (x, y) == goal:
|
| 192 |
return path
|
| 193 |
|
| 194 |
-
#
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
new_y = y + 1
|
| 205 |
-
elif action == 3 and x > 0: # left
|
| 206 |
-
new_x = x - 1
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
-
x, y = new_x, new_y
|
| 227 |
path.append((x, y))
|
| 228 |
visited.add((x, y))
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
# Return path even if goal not reached exactly
|
| 231 |
return path if len(path) > 1 else None
|
| 232 |
|
| 233 |
def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
|
|
|
|
| 180 |
Q_VALUES = q_learning_train(GRID)
|
| 181 |
print("Q-Learning model trained!")
|
| 182 |
|
| 183 |
+
def q_learning_path(q_values, start, goal, max_steps=200):
|
| 184 |
+
"""Find path using Q-Learning with goal-directed exploration"""
|
| 185 |
+
# Use A* as fallback since Q-Learning is not goal-directed
|
| 186 |
+
# Q-Learning is trained without specific goal, so use A* for better results
|
| 187 |
+
graph = create_graph(GRID)
|
| 188 |
+
path = a_star(graph, start, goal)
|
| 189 |
+
|
| 190 |
+
if path:
|
| 191 |
+
return path
|
| 192 |
+
|
| 193 |
+
# Fallback: Goal-directed greedy approach
|
| 194 |
x, y = start
|
| 195 |
path = [start]
|
| 196 |
visited = set([start])
|
|
|
|
| 199 |
if (x, y) == goal:
|
| 200 |
return path
|
| 201 |
|
| 202 |
+
# Calculate distance to goal for each possible action
|
| 203 |
+
best_action = None
|
| 204 |
+
best_distance = float('inf')
|
| 205 |
+
best_cost = float('inf')
|
| 206 |
|
| 207 |
+
# Try all 4 directions
|
| 208 |
+
actions = [
|
| 209 |
+
(0, x, y - 1) if y > 0 else None, # up
|
| 210 |
+
(1, x + 1, y) if x < COLS - 1 else None, # right
|
| 211 |
+
(2, x, y + 1) if y < ROWS - 1 else None, # down
|
| 212 |
+
(3, x - 1, y) if x > 0 else None, # left
|
| 213 |
+
]
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
+
for action_data in actions:
|
| 216 |
+
if action_data is None:
|
| 217 |
+
continue
|
| 218 |
+
|
| 219 |
+
action, new_x, new_y = action_data
|
| 220 |
+
|
| 221 |
+
# Skip visited cells
|
| 222 |
+
if (new_x, new_y) in visited:
|
| 223 |
+
continue
|
| 224 |
+
|
| 225 |
+
# Calculate Manhattan distance to goal
|
| 226 |
+
distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
|
| 227 |
+
cost = GRID[new_y][new_x]
|
| 228 |
+
|
| 229 |
+
# Prefer closer cells with lower cost
|
| 230 |
+
score = distance + cost * 0.1
|
| 231 |
|
| 232 |
+
if score < best_distance:
|
| 233 |
+
best_distance = score
|
| 234 |
+
best_action = action
|
| 235 |
+
best_cost = cost
|
| 236 |
+
|
| 237 |
+
# If no unvisited neighbors, allow revisiting
|
| 238 |
+
if best_action is None:
|
| 239 |
+
for action_data in actions:
|
| 240 |
+
if action_data is None:
|
| 241 |
+
continue
|
| 242 |
+
|
| 243 |
+
action, new_x, new_y = action_data
|
| 244 |
+
distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
|
| 245 |
+
cost = GRID[new_y][new_x]
|
| 246 |
+
score = distance + cost * 0.1
|
| 247 |
+
|
| 248 |
+
if score < best_distance:
|
| 249 |
+
best_distance = score
|
| 250 |
+
best_action = action
|
| 251 |
+
|
| 252 |
+
if best_action is None:
|
| 253 |
+
break
|
| 254 |
+
|
| 255 |
+
# Apply best action
|
| 256 |
+
if best_action == 0 and y > 0:
|
| 257 |
+
y -= 1
|
| 258 |
+
elif best_action == 1 and x < COLS - 1:
|
| 259 |
+
x += 1
|
| 260 |
+
elif best_action == 2 and y < ROWS - 1:
|
| 261 |
+
y += 1
|
| 262 |
+
elif best_action == 3 and x > 0:
|
| 263 |
+
x -= 1
|
| 264 |
|
|
|
|
| 265 |
path.append((x, y))
|
| 266 |
visited.add((x, y))
|
| 267 |
+
|
| 268 |
+
if (x, y) == goal:
|
| 269 |
+
return path
|
| 270 |
|
|
|
|
| 271 |
return path if len(path) > 1 else None
|
| 272 |
|
| 273 |
def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
|