koesan commited on
Commit
445e8dc
·
verified ·
1 Parent(s): a9927b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -19
app.py CHANGED
@@ -182,36 +182,53 @@ print("Q-Learning model trained!")
182
 
183
  def q_learning_path(q_values, start, goal, max_steps=100):
184
  """Find path using trained Q-Learning model"""
185
- current_state = list(start) # [x, y] format from frontend
186
- path = [tuple(current_state)]
 
 
187
 
188
- for _ in range(max_steps):
189
- # Convert to [y, x] for q_values indexing
190
- y, x = current_state[1], current_state[0]
191
 
192
- if tuple(current_state) == goal:
193
- break
194
-
195
- # Get best action
196
  action = np.argmax(q_values[y, x])
197
 
198
- # Apply action
 
199
  if action == 0 and y > 0: # up
200
- current_state[1] -= 1
201
  elif action == 1 and x < COLS - 1: # right
202
- current_state[0] += 1
203
  elif action == 2 and y < ROWS - 1: # down
204
- current_state[1] += 1
205
  elif action == 3 and x > 0: # left
206
- current_state[0] -= 1
207
 
208
- path.append(tuple(current_state))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- # Prevent infinite loops
211
- if len(path) > max_steps:
212
- break
213
 
214
- return path if path[-1] == goal else None
 
215
 
216
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
217
  start = (int(start_x), int(start_y))
 
182
 
183
  def q_learning_path(q_values, start, goal, max_steps=100):
184
  """Find path using trained Q-Learning model"""
185
+ # Start: (x, y) tuple format
186
+ x, y = start
187
+ path = [start]
188
+ visited = set([start])
189
 
190
+ for step in range(max_steps):
191
+ if (x, y) == goal:
192
+ return path
193
 
194
+ # Get best action from Q-table [y, x, action]
 
 
 
195
  action = np.argmax(q_values[y, x])
196
 
197
+ # Apply action (0=up, 1=right, 2=down, 3=left)
198
+ new_x, new_y = x, y
199
  if action == 0 and y > 0: # up
200
+ new_y = y - 1
201
  elif action == 1 and x < COLS - 1: # right
202
+ new_x = x + 1
203
  elif action == 2 and y < ROWS - 1: # down
204
+ new_y = y + 1
205
  elif action == 3 and x > 0: # left
206
+ new_x = x - 1
207
 
208
+ # Check if we're making progress
209
+ if (new_x, new_y) in visited:
210
+ # If stuck in loop, try alternative action
211
+ q_vals = q_values[y, x].copy()
212
+ q_vals[action] = -np.inf # Don't use same action
213
+ action = np.argmax(q_vals)
214
+
215
+ # Try alternative
216
+ new_x, new_y = x, y
217
+ if action == 0 and y > 0:
218
+ new_y = y - 1
219
+ elif action == 1 and x < COLS - 1:
220
+ new_x = x + 1
221
+ elif action == 2 and y < ROWS - 1:
222
+ new_y = y + 1
223
+ elif action == 3 and x > 0:
224
+ new_x = x - 1
225
 
226
+ x, y = new_x, new_y
227
+ path.append((x, y))
228
+ visited.add((x, y))
229
 
230
+ # Return path even if goal not reached exactly
231
+ return path if len(path) > 1 else None
232
 
233
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
234
  start = (int(start_x), int(start_y))