File size: 3,603 Bytes
5026d89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

import gymnasium as gym
from gymnasium import spaces
import numpy as np

# Constantes pour les états et les récompenses
CMD_AVANCER = 0
CMD_TOURNER = 1
ETAT_FAIM = 2
ETAT_SOMMEIL = 3
ETAT_HUMEUR = 4

# Actions
ACTION_ARRETER = 0
ACTION_AVANCER = 1
ACTION_TOURNE_G = 2
ACTION_TOURNE_D = 3


class MiRobotEnv(gym.Env):
    """Environnement de simulation pour MiRobot, un chiot robot apprenant
à réagir aux commandes du maître et à son état interne.
    """
    metadata = {"render_modes": ["human"], "render_fps": 30}

    def __init__(self):
        super(MiRobotEnv, self).__init__()

        self.action_space = spaces.Discrete(4)

        low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)
        high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

        self.state = None

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        initial_state = np.array([
            0.0,
            0.0,
            self.np_random.uniform(low=0.0, high=0.2),
            self.np_random.uniform(low=0.0, high=0.2),
            self.np_random.uniform(low=-0.1, high=0.1)
        ], dtype=np.float32)

        self.state = initial_state

        info = {}
        return self.state, info

    def _update_internal_states(self):
        self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0)
        self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0)

        humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001
        self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0)

    def _calculate_reward(self, action):
        reward = 0.0

        if self.state[CMD_AVANCER] > 0.5:
            if action == ACTION_AVANCER:
                reward += 1.0
            elif action == ACTION_ARRETER:
                reward -= 0.5

        if self.state[CMD_TOURNER] > 0.5:
            if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D:
                reward += 1.0
            elif action == ACTION_AVANCER:
                reward -= 0.5

        reward -= self.state[ETAT_FAIM] * 0.1
        reward -= self.state[ETAT_SOMMEIL] * 0.1

        reward += self.state[ETAT_HUMEUR] * 0.1

        return reward

    def _simulate_user_command(self):
        if self.np_random.random() < 0.2:
            self.state[CMD_AVANCER] = 0.0
            self.state[CMD_TOURNER] = 0.0

            choice = self.np_random.integers(0, 3)

            if choice == 1:
                self.state[CMD_AVANCER] = 1.0
            elif choice == 2:
                self.state[CMD_TOURNER] = 1.0

    def step(self, action):
        self._update_internal_states()
        reward = self._calculate_reward(action)
        self._simulate_user_command()

        terminated = False
        truncated = False
        info = {}

        return self.state, reward, terminated, truncated, info

    def render(self, mode='human'):
        faim = self.state[ETAT_FAIM] * 100
        sommeil = self.state[ETAT_SOMMEIL] * 100
        humeur = self.state[ETAT_HUMEUR]
        cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON"
        cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON"

        print(f"--- État MiRobot ---")
        print(f"  > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}")
        print(f"  > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%")
        print(f"  > Humeur: {humeur:.2f} (entre -1.0 et 1.0)")

    def close(self):
        pass