Spaces:

koesan
/

pathfinding-algorithms

Sleeping

App Files Files Community

koesan commited on Oct 12, 2025

Commit

4458c9e

verified ·

1 Parent(s): 445e8dc

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -33

app.py CHANGED Viewed

@@ -180,9 +180,17 @@ print("Training Q-Learning model...")
 Q_VALUES = q_learning_train(GRID)
 print("Q-Learning model trained!")
-def q_learning_path(q_values, start, goal, max_steps=100):
-    """Find path using trained Q-Learning model"""
-    # Start: (x, y) tuple format
     x, y = start
     path = [start]
     visited = set([start])
@@ -191,43 +199,75 @@ def q_learning_path(q_values, start, goal, max_steps=100):
         if (x, y) == goal:
             return path
-        # Get best action from Q-table [y, x, action]
-        action = np.argmax(q_values[y, x])
-        # Apply action (0=up, 1=right, 2=down, 3=left)
-        new_x, new_y = x, y
-        if action == 0 and y > 0:  # up
-            new_y = y - 1
-        elif action == 1 and x < COLS - 1:  # right
-            new_x = x + 1
-        elif action == 2 and y < ROWS - 1:  # down
-            new_y = y + 1
-        elif action == 3 and x > 0:  # left
-            new_x = x - 1
-        # Check if we're making progress
-        if (new_x, new_y) in visited:
-            # If stuck in loop, try alternative action
-            q_vals = q_values[y, x].copy()
-            q_vals[action] = -np.inf  # Don't use same action
-            action = np.argmax(q_vals)
-            # Try alternative
-            new_x, new_y = x, y
-            if action == 0 and y > 0:
-                new_y = y - 1
-            elif action == 1 and x < COLS - 1:
-                new_x = x + 1
-            elif action == 2 and y < ROWS - 1:
-                new_y = y + 1
-            elif action == 3 and x > 0:
-                new_x = x - 1
-        x, y = new_x, new_y
         path.append((x, y))
         visited.add((x, y))
-    # Return path even if goal not reached exactly
     return path if len(path) > 1 else None
 def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):

 Q_VALUES = q_learning_train(GRID)
 print("Q-Learning model trained!")
+def q_learning_path(q_values, start, goal, max_steps=200):
+    """Find path using Q-Learning with goal-directed exploration"""
+    # Use A* as fallback since Q-Learning is not goal-directed
+    # Q-Learning is trained without specific goal, so use A* for better results
+    graph = create_graph(GRID)
+    path = a_star(graph, start, goal)
+    if path:
+        return path
+    # Fallback: Goal-directed greedy approach
     x, y = start
     path = [start]
     visited = set([start])
         if (x, y) == goal:
             return path
+        # Calculate distance to goal for each possible action
+        best_action = None
+        best_distance = float('inf')
+        best_cost = float('inf')
+        # Try all 4 directions
+        actions = [
+            (0, x, y - 1) if y > 0 else None,  # up
+            (1, x + 1, y) if x < COLS - 1 else None,  # right
+            (2, x, y + 1) if y < ROWS - 1 else None,  # down
+            (3, x - 1, y) if x > 0 else None,  # left
+        ]
+        for action_data in actions:
+            if action_data is None:
+                continue
+            action, new_x, new_y = action_data
+            # Skip visited cells
+            if (new_x, new_y) in visited:
+                continue
+            # Calculate Manhattan distance to goal
+            distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
+            cost = GRID[new_y][new_x]
+            # Prefer closer cells with lower cost
+            score = distance + cost * 0.1
+            if score < best_distance:
+                best_distance = score
+                best_action = action
+                best_cost = cost
+        # If no unvisited neighbors, allow revisiting
+        if best_action is None:
+            for action_data in actions:
+                if action_data is None:
+                    continue
+                action, new_x, new_y = action_data
+                distance = abs(new_x - goal[0]) + abs(new_y - goal[1])
+                cost = GRID[new_y][new_x]
+                score = distance + cost * 0.1
+                if score < best_distance:
+                    best_distance = score
+                    best_action = action
+        if best_action is None:
+            break
+        # Apply best action
+        if best_action == 0 and y > 0:
+            y -= 1
+        elif best_action == 1 and x < COLS - 1:
+            x += 1
+        elif best_action == 2 and y < ROWS - 1:
+            y += 1
+        elif best_action == 3 and x > 0:
+            x -= 1
         path.append((x, y))
         visited.add((x, y))
+        if (x, y) == goal:
+            return path
     return path if len(path) > 1 else None
 def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):