koesan commited on
Commit
470ff46
·
verified ·
1 Parent(s): b2a4c30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -6
app.py CHANGED
@@ -9,6 +9,7 @@ import io
9
  from PIL import Image
10
  import os
11
  import base64
 
12
 
13
  os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
14
 
@@ -123,22 +124,115 @@ def bellman_ford(graph, start, goal):
123
  path.reverse()
124
  return path if path and path[0] == start else None
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
127
  start = (int(start_x), int(start_y))
128
  goal = (int(goal_x), int(goal_y))
129
 
130
  if algorithm == "A*":
131
  path = a_star(GRAPH, start, goal)
132
- color = '#00FF00'
133
  title = "A* Algorithm"
134
  elif algorithm == "Dijkstra":
135
  path = dijkstra(GRAPH, start, goal)
136
- color = '#FFA500'
137
  title = "Dijkstra Algorithm"
138
- else:
139
  path = bellman_ford(GRAPH, start, goal)
140
- color = '#FF0000'
141
  title = "Bellman-Ford Algorithm"
 
 
 
 
142
 
143
  # Arka plan resmini yükle - absolute path
144
  import os
@@ -165,10 +259,10 @@ def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
165
 
166
  # Path'i çiz
167
  if path:
168
- # Path çizgisi
169
  path_x = [x + 0.5 for x, y in path]
170
  path_y = [ROWS - y - 0.5 for x, y in path]
171
- ax.plot(path_x, path_y, color=color, linewidth=4, alpha=0.7, zorder=5)
172
 
173
  # Start ve goal noktaları
174
  for i, (x, y) in enumerate(path):
 
9
  from PIL import Image
10
  import os
11
  import base64
12
+ import copy
13
 
14
  os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
15
 
 
124
  path.reverse()
125
  return path if path and path[0] == start else None
126
 
127
+ def q_learning_train(grid, episodes=1000):
128
+ """Train Q-Learning model on the grid"""
129
+ rows, cols = len(grid), len(grid[0])
130
+ q_values = np.zeros((rows, cols, 4)) # 4 actions: up, right, down, left
131
+
132
+ lr = 0.9
133
+ gamma = 0.9
134
+ epsilon = 0.9
135
+
136
+ def is_valid(state):
137
+ y, x = state
138
+ return 0 <= x < cols and 0 <= y < rows
139
+
140
+ for episode in range(episodes):
141
+ # Random start position
142
+ state = [np.random.randint(rows), np.random.randint(cols)]
143
+
144
+ for _ in range(100): # Max steps per episode
145
+ old_state = copy.copy(state)
146
+
147
+ # Epsilon-greedy action selection
148
+ if np.random.random() > epsilon:
149
+ action = np.random.randint(4)
150
+ else:
151
+ action = np.argmax(q_values[state[0], state[1]])
152
+
153
+ # Apply action (0=up, 1=right, 2=down, 3=left)
154
+ new_state = copy.copy(state)
155
+ if action == 0 and state[0] > 0: # up
156
+ new_state[0] -= 1
157
+ elif action == 1 and state[1] < cols - 1: # right
158
+ new_state[1] += 1
159
+ elif action == 2 and state[0] < rows - 1: # down
160
+ new_state[0] += 1
161
+ elif action == 3 and state[1] > 0: # left
162
+ new_state[1] -= 1
163
+
164
+ # Calculate reward (negative cost)
165
+ if is_valid(new_state):
166
+ reward = -grid[new_state[0]][new_state[1]]
167
+ state = new_state
168
+ else:
169
+ reward = -100 # Penalty for invalid move
170
+
171
+ # Q-Learning update
172
+ old_q = q_values[old_state[0], old_state[1], action]
173
+ td = reward + (gamma * np.max(q_values[state[0], state[1]])) - old_q
174
+ q_values[old_state[0], old_state[1], action] = old_q + (lr * td)
175
+
176
+ return q_values
177
+
178
+ # Train Q-Learning model once at startup
179
+ print("Training Q-Learning model...")
180
+ Q_VALUES = q_learning_train(GRID)
181
+ print("Q-Learning model trained!")
182
+
183
+ def q_learning_path(q_values, start, goal, max_steps=100):
184
+ """Find path using trained Q-Learning model"""
185
+ current_state = list(start) # [x, y] format from frontend
186
+ path = [tuple(current_state)]
187
+
188
+ for _ in range(max_steps):
189
+ # Convert to [y, x] for q_values indexing
190
+ y, x = current_state[1], current_state[0]
191
+
192
+ if tuple(current_state) == goal:
193
+ break
194
+
195
+ # Get best action
196
+ action = np.argmax(q_values[y, x])
197
+
198
+ # Apply action
199
+ if action == 0 and y > 0: # up
200
+ current_state[1] -= 1
201
+ elif action == 1 and x < COLS - 1: # right
202
+ current_state[0] += 1
203
+ elif action == 2 and y < ROWS - 1: # down
204
+ current_state[1] += 1
205
+ elif action == 3 and x > 0: # left
206
+ current_state[0] -= 1
207
+
208
+ path.append(tuple(current_state))
209
+
210
+ # Prevent infinite loops
211
+ if len(path) > max_steps:
212
+ break
213
+
214
+ return path if path[-1] == goal else None
215
+
216
  def visualize_path(start_x, start_y, goal_x, goal_y, algorithm):
217
  start = (int(start_x), int(start_y))
218
  goal = (int(goal_x), int(goal_y))
219
 
220
  if algorithm == "A*":
221
  path = a_star(GRAPH, start, goal)
222
+ color = '#0066FF' # Blue
223
  title = "A* Algorithm"
224
  elif algorithm == "Dijkstra":
225
  path = dijkstra(GRAPH, start, goal)
226
+ color = '#0066FF' # Blue
227
  title = "Dijkstra Algorithm"
228
+ elif algorithm == "Bellman-Ford":
229
  path = bellman_ford(GRAPH, start, goal)
230
+ color = '#0066FF' # Blue
231
  title = "Bellman-Ford Algorithm"
232
+ else: # Q-Learning
233
+ path = q_learning_path(Q_VALUES, start, goal)
234
+ color = '#0066FF' # Blue
235
+ title = "Q-Learning Algorithm"
236
 
237
  # Arka plan resmini yükle - absolute path
238
  import os
 
259
 
260
  # Path'i çiz
261
  if path:
262
+ # Path çizgisi (mavi)
263
  path_x = [x + 0.5 for x, y in path]
264
  path_y = [ROWS - y - 0.5 for x, y in path]
265
+ ax.plot(path_x, path_y, color='#0066FF', linewidth=4, alpha=0.7, zorder=5)
266
 
267
  # Start ve goal noktaları
268
  for i, (x, y) in enumerate(path):