File size: 7,232 Bytes
28dbd6d
 
 
 
 
595200b
 
28dbd6d
 
 
 
 
 
595200b
28dbd6d
 
595200b
 
 
 
 
 
 
 
 
 
 
28dbd6d
 
595200b
28dbd6d
 
595200b
 
28dbd6d
 
 
 
595200b
 
 
 
 
 
 
 
 
 
 
 
28dbd6d
 
 
 
 
 
595200b
 
28dbd6d
595200b
28dbd6d
595200b
 
28dbd6d
 
 
595200b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28dbd6d
595200b
28dbd6d
595200b
28dbd6d
595200b
 
28dbd6d
595200b
28dbd6d
 
595200b
 
 
28dbd6d
 
 
595200b
28dbd6d
 
 
 
 
595200b
 
 
 
28dbd6d
 
 
 
595200b
 
 
28dbd6d
595200b
 
28dbd6d
595200b
28dbd6d
595200b
28dbd6d
595200b
28dbd6d
595200b
28dbd6d
 
 
 
 
 
 
 
 
 
 
595200b
28dbd6d
595200b
28dbd6d
 
595200b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class Drone3DEnv(gym.Env):
    """Custom 3D Drone Environment with physics simulation - NO PyBullet dependencies"""
    metadata = {"render_modes": []}

    def __init__(self, render_mode=None):
        super().__init__()
        self.render_mode = render_mode
        
        # Constants
        self.BOUNDS = np.array([[-5, -5, 0], [5, 5, 10]])
        self.TARGET_BOUNDS = np.array([[-4, -4, 1], [4, 4, 9]])
        self.MAX_STEPS = 1000
        self.dt = 0.02  # 50Hz physics
        
        # Physics parameters
        self.mass = 0.027  # kg (Crazyflie)
        self.g = 9.81
        self.drag_coeff = 0.01
        
        # State: [x, y, z, vx, vy, vz, roll, pitch, yaw, wx, wy, wz]
        self.state = np.zeros(12, dtype=np.float32)
        
        # Action Space: 4 motor thrusts (-1 to 1)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
        
        # Observation Space: relative position + velocities + orientation
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
        
        self.wind_vector = np.zeros(3, dtype=np.float32)
        self.target_pos = np.zeros(3, dtype=np.float32)
        self.step_count = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        # Reset state: hovering at origin
        self.state = np.zeros(12, dtype=np.float32)
        self.state[2] = 1.0  # z = 1m
        
        # Random target
        self.target_pos = np.random.uniform(self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1]).astype(np.float32)
        
        # Random wind
        self.wind_vector = np.random.uniform(-1, 1, 3).astype(np.float32)
        self.wind_vector[2] = 0  # No vertical wind
        
        self.step_count = 0
        return self._get_obs(), {}

    def step(self, action):
        self.step_count += 1
        
        # Update wind (random walk)
        self.wind_vector += np.random.normal(0, 0.1, 3).astype(np.float32)
        self.wind_vector = np.clip(self.wind_vector, -5, 5)
        self.wind_vector[2] = 0
        
        # Update moving target
        target_move = np.random.normal(0, 0.05, 3).astype(np.float32)
        self.target_pos += target_move
        self.target_pos = np.clip(self.target_pos, self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1])
        
        # Extract state
        pos = self.state[0:3]
        vel = self.state[3:6]
        orn = self.state[6:9]  # roll, pitch, yaw
        ang_vel = self.state[9:12]
        
        # Convert action to thrust (0 to 1)
        thrust = (action + 1.0) / 2.0
        thrust = np.clip(thrust, 0, 1)
        
        # Total thrust force (simplified)
        total_thrust = np.sum(thrust) * 0.15  # Max ~0.6N total
        
        # Forces in body frame
        # Simplified: thrust is vertical in body frame
        thrust_force_body = np.array([0, 0, total_thrust], dtype=np.float32)
        
        # Rotate to world frame (simplified rotation)
        c_r, s_r = np.cos(orn[0]), np.sin(orn[0])
        c_p, s_p = np.cos(orn[1]), np.sin(orn[1])
        
        # Simplified rotation matrix (roll-pitch only for thrust)
        thrust_force_world = np.array([
            -s_p * thrust_force_body[2],
            s_r * c_p * thrust_force_body[2],
            c_r * c_p * thrust_force_body[2]
        ], dtype=np.float32)
        
        # Gravity
        gravity_force = np.array([0, 0, -self.mass * self.g], dtype=np.float32)
        
        # Wind force
        wind_force = self.wind_vector * 0.01
        
        # Drag
        drag_force = -self.drag_coeff * vel
        
        # Total force
        total_force = thrust_force_world + gravity_force + wind_force + drag_force
        
        # Acceleration
        accel = total_force / self.mass
        
        # Update velocity and position (Euler integration)
        vel_new = vel + accel * self.dt
        pos_new = pos + vel_new * self.dt
        
        # Torques (simplified from thrust differential)
        # Roll torque from left-right thrust difference
        roll_torque = (thrust[1] - thrust[3]) * 0.01
        # Pitch torque from front-back thrust difference  
        pitch_torque = (thrust[0] - thrust[2]) * 0.01
        # Yaw torque (simplified)
        yaw_torque = 0.0
        
        # Angular acceleration (simplified inertia)
        ang_accel = np.array([roll_torque, pitch_torque, yaw_torque], dtype=np.float32) * 10.0
        
        # Update angular velocity and orientation
        ang_vel_new = ang_vel + ang_accel * self.dt
        ang_vel_new = np.clip(ang_vel_new, -10, 10)  # Limit angular velocity
        
        orn_new = orn + ang_vel_new * self.dt
        # Wrap angles
        orn_new[0] = np.arctan2(np.sin(orn_new[0]), np.cos(orn_new[0]))  # roll
        orn_new[1] = np.arctan2(np.sin(orn_new[1]), np.cos(orn_new[1]))  # pitch
        orn_new[2] = np.arctan2(np.sin(orn_new[2]), np.cos(orn_new[2]))  # yaw
        
        # Update state
        self.state[0:3] = pos_new
        self.state[3:6] = vel_new
        self.state[6:9] = orn_new
        self.state[9:12] = ang_vel_new
        
        # Compute reward
        reward = self._compute_reward(pos_new, orn_new, vel_new, ang_vel_new)
        
        # Check termination
        terminated = False
        
        # Out of bounds
        if (pos_new < self.BOUNDS[0]).any() or (pos_new > self.BOUNDS[1]).any():
            terminated = True
            reward -= 100.0
        
        # Crash (ground)
        if pos_new[2] < 0.1:
            terminated = True
            reward -= 100.0
        
        # Target reached
        dist = np.linalg.norm(pos_new - self.target_pos)
        if dist < 0.5:
            terminated = True
            reward += 100.0
        
        truncated = self.step_count >= self.MAX_STEPS
        
        return self._get_obs(), reward, terminated, truncated, {}

    def _get_obs(self):
        pos = self.state[0:3]
        vel = self.state[3:6]
        orn = self.state[6:9]
        ang_vel = self.state[9:12]
        
        # Relative position to target
        rel_pos = self.target_pos - pos
        
        # Observation: [rel_x, rel_y, rel_z, roll, pitch, yaw, vx, vy, vz, wx, wy, wz]
        obs = np.concatenate([rel_pos, orn, vel, ang_vel]).astype(np.float32)
        return obs

    def _compute_reward(self, pos, orn, vel, ang_vel):
        # Stability
        r_smooth = -0.1 * np.linalg.norm(ang_vel)
        r_stability = -np.linalg.norm(orn[0:2])
        
        # Target distance
        dist = np.linalg.norm(pos - self.target_pos)
        r_target = -dist * 0.5
        
        # Boundary safety
        d_min = np.min([
            pos[0] - self.BOUNDS[0][0],
            self.BOUNDS[1][0] - pos[0],
            pos[1] - self.BOUNDS[0][1],
            self.BOUNDS[1][1] - pos[1],
            pos[2] - self.BOUNDS[0][2],
            self.BOUNDS[1][2] - pos[2]
        ])
        
        r_boundary = 0.0
        if d_min < 1.0:
            r_boundary = -np.exp(1.0 - d_min)
        
        return r_stability + r_smooth + r_target + r_boundary

    def close(self):
        pass