| |
|
| | import gymnasium as gym |
| | from gymnasium import spaces |
| | import numpy as np |
| |
|
| | |
| | CMD_AVANCER = 0 |
| | CMD_TOURNER = 1 |
| | ETAT_FAIM = 2 |
| | ETAT_SOMMEIL = 3 |
| | ETAT_HUMEUR = 4 |
| |
|
| | |
| | ACTION_ARRETER = 0 |
| | ACTION_AVANCER = 1 |
| | ACTION_TOURNE_G = 2 |
| | ACTION_TOURNE_D = 3 |
| |
|
| |
|
| | class MiRobotEnv(gym.Env): |
| | """Environnement de simulation pour MiRobot, un chiot robot apprenant |
| | à réagir aux commandes du maître et à son état interne. |
| | """ |
| | metadata = {"render_modes": ["human"], "render_fps": 30} |
| |
|
| | def __init__(self): |
| | super(MiRobotEnv, self).__init__() |
| |
|
| | self.action_space = spaces.Discrete(4) |
| |
|
| | low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) |
| | high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32) |
| | self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) |
| |
|
| | self.state = None |
| |
|
| | def reset(self, seed=None, options=None): |
| | super().reset(seed=seed) |
| |
|
| | initial_state = np.array([ |
| | 0.0, |
| | 0.0, |
| | self.np_random.uniform(low=0.0, high=0.2), |
| | self.np_random.uniform(low=0.0, high=0.2), |
| | self.np_random.uniform(low=-0.1, high=0.1) |
| | ], dtype=np.float32) |
| |
|
| | self.state = initial_state |
| |
|
| | info = {} |
| | return self.state, info |
| |
|
| | def _update_internal_states(self): |
| | self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0) |
| | self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0) |
| |
|
| | humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001 |
| | self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0) |
| |
|
| | def _calculate_reward(self, action): |
| | reward = 0.0 |
| |
|
| | if self.state[CMD_AVANCER] > 0.5: |
| | if action == ACTION_AVANCER: |
| | reward += 1.0 |
| | elif action == ACTION_ARRETER: |
| | reward -= 0.5 |
| |
|
| | if self.state[CMD_TOURNER] > 0.5: |
| | if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D: |
| | reward += 1.0 |
| | elif action == ACTION_AVANCER: |
| | reward -= 0.5 |
| |
|
| | reward -= self.state[ETAT_FAIM] * 0.1 |
| | reward -= self.state[ETAT_SOMMEIL] * 0.1 |
| |
|
| | reward += self.state[ETAT_HUMEUR] * 0.1 |
| |
|
| | return reward |
| |
|
| | def _simulate_user_command(self): |
| | if self.np_random.random() < 0.2: |
| | self.state[CMD_AVANCER] = 0.0 |
| | self.state[CMD_TOURNER] = 0.0 |
| |
|
| | choice = self.np_random.integers(0, 3) |
| |
|
| | if choice == 1: |
| | self.state[CMD_AVANCER] = 1.0 |
| | elif choice == 2: |
| | self.state[CMD_TOURNER] = 1.0 |
| |
|
| | def step(self, action): |
| | self._update_internal_states() |
| | reward = self._calculate_reward(action) |
| | self._simulate_user_command() |
| |
|
| | terminated = False |
| | truncated = False |
| | info = {} |
| |
|
| | return self.state, reward, terminated, truncated, info |
| |
|
| | def render(self, mode='human'): |
| | faim = self.state[ETAT_FAIM] * 100 |
| | sommeil = self.state[ETAT_SOMMEIL] * 100 |
| | humeur = self.state[ETAT_HUMEUR] |
| | cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON" |
| | cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON" |
| |
|
| | print(f"--- État MiRobot ---") |
| | print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}") |
| | print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%") |
| | print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)") |
| |
|
| | def close(self): |
| | pass |
| |
|