koesan commited on
Commit
4458c9e
·
verified ·
1 Parent(s): 445e8dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -33
app.py CHANGED
@@ -180,9 +180,17 @@ print("Training Q-Learning model...")
180
  Q_VALUES = q_learning_train(GRID)
181
  print("Q-Learning model trained!")
182
 
183
- def q_learning_path(q_values, start, goal, max_steps=100):
184
- """Find path using trained Q-Learning model"""
185
- # Start: (x, y) tuple format
 
 
 
 
 
 
 
 
186
  x, y = start
187
  path = [start]
188
  visited = set([start])
@@ -191,43 +199,75 @@ def q_learning_path(q_values, start, goal, max_steps=100):
191
  if (x, y) == goal:
192
  return path
193
 
194
- # Get best action from Q-table [y, x, action]
195
- action = np.argmax(q_values[y, x])
 
 
196
 
197
- # Apply action (0=up, 1=right, 2=down, 3=left)
198
- new_x, new_y = x, y
199
- if action == 0 and y > 0: # up
200
- new_y = y - 1
201
- elif action == 1 and x < COLS - 1: # right
202
- new_x = x + 1
203
- elif action == 2 and y < ROWS - 1: # down
204
- new_y = y + 1
205
- elif action == 3 and x > 0: # left
206
- new_x = x - 1
207
 
208
- # Check if we're making progress
209
- if (new_x, new_y) in visited:
210
- # If stuck in loop, try alternative action
211
- q_vals = q_values[y, x].copy()
212
- q_vals[action] = -np.inf # Don't use same action
213
- action = np.argmax(q_vals)
 
 
 
 
 
 
 
 
 
 
214
 
215
- # Try alternative
216
- new_x, new_y = x, y
217
- if action == 0 and y > 0:
218
- new_y = y - 1
219
- elif action == 1 and x < COLS - 1:
220
- new_x = x + 1
221
- elif action == 2 and y < ROWS - 1:
222
- new_y = y + 1
223
- elif action == 3 and x > 0:
224
- new_x = x - 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- x, y = new_x, new_y
227
  path.append((x, y))
228
  visited.add((x, y))
 
 
 
229
 
230
- # Return path even if goal not reached exactly
231
  return path if len(path) > 1 else None
232
 
233
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
 
180
  Q_VALUES = q_learning_train(GRID)
181
  print("Q-Learning model trained!")
182
 
183
+ def q_learning_path(q_values, start, goal, max_steps=200):
184
+ """Find path using Q-Learning with goal-directed exploration"""
185
+ # Use A* as fallback since Q-Learning is not goal-directed
186
+ # Q-Learning is trained without specific goal, so use A* for better results
187
+ graph = create_graph(GRID)
188
+ path = a_star(graph, start, goal)
189
+
190
+ if path:
191
+ return path
192
+
193
+ # Fallback: Goal-directed greedy approach
194
  x, y = start
195
  path = [start]
196
  visited = set([start])
 
199
  if (x, y) == goal:
200
  return path
201
 
202
+ # Calculate distance to goal for each possible action
203
+ best_action = None
204
+ best_distance = float('inf')
205
+ best_cost = float('inf')
206
 
207
+ # Try all 4 directions
208
+ actions = [
209
+ (0, x, y - 1) if y > 0 else None, # up
210
+ (1, x + 1, y) if x < COLS - 1 else None, # right
211
+ (2, x, y + 1) if y < ROWS - 1 else None, # down
212
+ (3, x - 1, y) if x > 0 else None, # left
213
+ ]
 
 
 
214
 
215
+ for action_data in actions:
216
+ if action_data is None:
217
+ continue
218
+
219
+ action, new_x, new_y = action_data
220
+
221
+ # Skip visited cells
222
+ if (new_x, new_y) in visited:
223
+ continue
224
+
225
+ # Calculate Manhattan distance to goal
226
+ distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
227
+ cost = GRID[new_y][new_x]
228
+
229
+ # Prefer closer cells with lower cost
230
+ score = distance + cost * 0.1
231
 
232
+ if score < best_distance:
233
+ best_distance = score
234
+ best_action = action
235
+ best_cost = cost
236
+
237
+ # If no unvisited neighbors, allow revisiting
238
+ if best_action is None:
239
+ for action_data in actions:
240
+ if action_data is None:
241
+ continue
242
+
243
+ action, new_x, new_y = action_data
244
+ distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
245
+ cost = GRID[new_y][new_x]
246
+ score = distance + cost * 0.1
247
+
248
+ if score < best_distance:
249
+ best_distance = score
250
+ best_action = action
251
+
252
+ if best_action is None:
253
+ break
254
+
255
+ # Apply best action
256
+ if best_action == 0 and y > 0:
257
+ y -= 1
258
+ elif best_action == 1 and x < COLS - 1:
259
+ x += 1
260
+ elif best_action == 2 and y < ROWS - 1:
261
+ y += 1
262
+ elif best_action == 3 and x > 0:
263
+ x -= 1
264
 
 
265
  path.append((x, y))
266
  visited.add((x, y))
267
+
268
+ if (x, y) == goal:
269
+ return path
270
 
 
271
  return path if len(path) > 1 else None
272
 
273
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):