Update app.py
Browse files
app.py
CHANGED
|
@@ -38,7 +38,7 @@ class Ant:
|
|
| 38 |
self.path_home = []
|
| 39 |
self.role = "explorer"
|
| 40 |
self.communication_range = 10
|
| 41 |
-
self.q_table =
|
| 42 |
|
| 43 |
def perceive_environment(self, pheromone_grid, ants):
|
| 44 |
self.food_pheromone = pheromone_grid[self.position[0], self.position[1], 0]
|
|
@@ -48,13 +48,13 @@ class Ant:
|
|
| 48 |
# Perceive nearby ants
|
| 49 |
self.nearby_ants = [ant for ant in ants if distance.euclidean(self.position, ant.position) <= self.communication_range]
|
| 50 |
|
| 51 |
-
|
| 52 |
possible_actions = self.get_possible_actions()
|
| 53 |
|
| 54 |
if random.random() < self.genome['exploration_rate']:
|
| 55 |
action = random.choice(possible_actions)
|
| 56 |
else:
|
| 57 |
-
q_values = [self.
|
| 58 |
action = possible_actions[np.argmax(q_values)]
|
| 59 |
|
| 60 |
reward = self.calculate_reward()
|
|
@@ -62,24 +62,26 @@ class Ant:
|
|
| 62 |
|
| 63 |
return action
|
| 64 |
|
| 65 |
-
def
|
| 66 |
-
|
| 67 |
-
return 10
|
| 68 |
-
elif self.position in FOOD_SOURCES:
|
| 69 |
-
return 20
|
| 70 |
-
elif self.position in OBSTACLES:
|
| 71 |
-
return -10
|
| 72 |
-
else:
|
| 73 |
-
return -1 + self.food_pheromone - self.danger_pheromone + 0.5 * self.exploration_pheromone
|
| 74 |
|
| 75 |
def update_q_table(self, action, reward):
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
)
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
def get_possible_actions(self):
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
def update(self, pheromone_grid, ants):
|
| 85 |
self.perceive_environment(pheromone_grid, ants)
|
|
|
|
| 38 |
self.path_home = []
|
| 39 |
self.role = "explorer"
|
| 40 |
self.communication_range = 10
|
| 41 |
+
self.q_table = {} # Changed to dictionary for flexible indexing
|
| 42 |
|
| 43 |
def perceive_environment(self, pheromone_grid, ants):
|
| 44 |
self.food_pheromone = pheromone_grid[self.position[0], self.position[1], 0]
|
|
|
|
| 48 |
# Perceive nearby ants
|
| 49 |
self.nearby_ants = [ant for ant in ants if distance.euclidean(self.position, ant.position) <= self.communication_range]
|
| 50 |
|
| 51 |
+
def act(self, pheromone_grid):
|
| 52 |
possible_actions = self.get_possible_actions()
|
| 53 |
|
| 54 |
if random.random() < self.genome['exploration_rate']:
|
| 55 |
action = random.choice(possible_actions)
|
| 56 |
else:
|
| 57 |
+
q_values = [self.get_q_value(action) for action in possible_actions]
|
| 58 |
action = possible_actions[np.argmax(q_values)]
|
| 59 |
|
| 60 |
reward = self.calculate_reward()
|
|
|
|
| 62 |
|
| 63 |
return action
|
| 64 |
|
| 65 |
+
def get_q_value(self, action):
|
| 66 |
+
return self.q_table.get((self.position, action), 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def update_q_table(self, action, reward):
|
| 69 |
+
current_q = self.get_q_value(action)
|
| 70 |
+
max_future_q = max([self.get_q_value(future_action) for future_action in self.get_possible_actions()])
|
| 71 |
+
|
| 72 |
+
new_q = (1 - self.genome['learning_rate']) * current_q + \
|
| 73 |
+
self.genome['learning_rate'] * (reward + self.genome['discount_factor'] * max_future_q)
|
| 74 |
+
|
| 75 |
+
self.q_table[(self.position, action)] = new_q
|
| 76 |
|
| 77 |
def get_possible_actions(self):
|
| 78 |
+
x, y = self.position
|
| 79 |
+
possible_actions = []
|
| 80 |
+
for dx, dy in [(0, 1), (1, 0), (0, -1), (-1, 0)]: # right, down, left, up
|
| 81 |
+
new_x, new_y = x + dx, y + dy
|
| 82 |
+
if 0 <= new_x < GRID_SIZE and 0 <= new_y < GRID_SIZE and (new_x, new_y) not in OBSTACLES:
|
| 83 |
+
possible_actions.append((new_x, new_y))
|
| 84 |
+
return possible_actions
|
| 85 |
|
| 86 |
def update(self, pheromone_grid, ants):
|
| 87 |
self.perceive_environment(pheromone_grid, ants)
|