Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import gymnasium as gym | |
| from gymnasium import spaces | |
| class Drone3DEnv(gym.Env): | |
| """Custom 3D Drone Environment with physics simulation - NO PyBullet dependencies""" | |
| metadata = {"render_modes": []} | |
| def __init__(self, render_mode=None): | |
| super().__init__() | |
| self.render_mode = render_mode | |
| # Constants | |
| self.BOUNDS = np.array([[-5, -5, 0], [5, 5, 10]]) | |
| self.TARGET_BOUNDS = np.array([[-4, -4, 1], [4, 4, 9]]) | |
| self.MAX_STEPS = 1000 | |
| self.dt = 0.02 # 50Hz physics | |
| # Physics parameters | |
| self.mass = 0.027 # kg (Crazyflie) | |
| self.g = 9.81 | |
| self.drag_coeff = 0.01 | |
| # State: [x, y, z, vx, vy, vz, roll, pitch, yaw, wx, wy, wz] | |
| self.state = np.zeros(12, dtype=np.float32) | |
| # Action Space: 4 motor thrusts (-1 to 1) | |
| self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32) | |
| # Observation Space: relative position + velocities + orientation | |
| self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32) | |
| self.wind_vector = np.zeros(3, dtype=np.float32) | |
| self.target_pos = np.zeros(3, dtype=np.float32) | |
| self.step_count = 0 | |
| def reset(self, seed=None, options=None): | |
| super().reset(seed=seed) | |
| # Reset state: hovering at origin | |
| self.state = np.zeros(12, dtype=np.float32) | |
| self.state[2] = 1.0 # z = 1m | |
| # Random target | |
| self.target_pos = np.random.uniform(self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1]).astype(np.float32) | |
| # Random wind | |
| self.wind_vector = np.random.uniform(-1, 1, 3).astype(np.float32) | |
| self.wind_vector[2] = 0 # No vertical wind | |
| self.step_count = 0 | |
| return self._get_obs(), {} | |
| def step(self, action): | |
| self.step_count += 1 | |
| # Update wind (random walk) | |
| self.wind_vector += np.random.normal(0, 0.1, 3).astype(np.float32) | |
| self.wind_vector = np.clip(self.wind_vector, -5, 5) | |
| self.wind_vector[2] = 0 | |
| # Update moving target | |
| target_move = np.random.normal(0, 0.05, 3).astype(np.float32) | |
| self.target_pos += target_move | |
| self.target_pos = np.clip(self.target_pos, self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1]) | |
| # Extract state | |
| pos = self.state[0:3] | |
| vel = self.state[3:6] | |
| orn = self.state[6:9] # roll, pitch, yaw | |
| ang_vel = self.state[9:12] | |
| # Convert action to thrust (0 to 1) | |
| thrust = (action + 1.0) / 2.0 | |
| thrust = np.clip(thrust, 0, 1) | |
| # Total thrust force (simplified) | |
| total_thrust = np.sum(thrust) * 0.15 # Max ~0.6N total | |
| # Forces in body frame | |
| # Simplified: thrust is vertical in body frame | |
| thrust_force_body = np.array([0, 0, total_thrust], dtype=np.float32) | |
| # Rotate to world frame (simplified rotation) | |
| c_r, s_r = np.cos(orn[0]), np.sin(orn[0]) | |
| c_p, s_p = np.cos(orn[1]), np.sin(orn[1]) | |
| # Simplified rotation matrix (roll-pitch only for thrust) | |
| thrust_force_world = np.array([ | |
| -s_p * thrust_force_body[2], | |
| s_r * c_p * thrust_force_body[2], | |
| c_r * c_p * thrust_force_body[2] | |
| ], dtype=np.float32) | |
| # Gravity | |
| gravity_force = np.array([0, 0, -self.mass * self.g], dtype=np.float32) | |
| # Wind force | |
| wind_force = self.wind_vector * 0.01 | |
| # Drag | |
| drag_force = -self.drag_coeff * vel | |
| # Total force | |
| total_force = thrust_force_world + gravity_force + wind_force + drag_force | |
| # Acceleration | |
| accel = total_force / self.mass | |
| # Update velocity and position (Euler integration) | |
| vel_new = vel + accel * self.dt | |
| pos_new = pos + vel_new * self.dt | |
| # Torques (simplified from thrust differential) | |
| # Roll torque from left-right thrust difference | |
| roll_torque = (thrust[1] - thrust[3]) * 0.01 | |
| # Pitch torque from front-back thrust difference | |
| pitch_torque = (thrust[0] - thrust[2]) * 0.01 | |
| # Yaw torque (simplified) | |
| yaw_torque = 0.0 | |
| # Angular acceleration (simplified inertia) | |
| ang_accel = np.array([roll_torque, pitch_torque, yaw_torque], dtype=np.float32) * 10.0 | |
| # Update angular velocity and orientation | |
| ang_vel_new = ang_vel + ang_accel * self.dt | |
| ang_vel_new = np.clip(ang_vel_new, -10, 10) # Limit angular velocity | |
| orn_new = orn + ang_vel_new * self.dt | |
| # Wrap angles | |
| orn_new[0] = np.arctan2(np.sin(orn_new[0]), np.cos(orn_new[0])) # roll | |
| orn_new[1] = np.arctan2(np.sin(orn_new[1]), np.cos(orn_new[1])) # pitch | |
| orn_new[2] = np.arctan2(np.sin(orn_new[2]), np.cos(orn_new[2])) # yaw | |
| # Update state | |
| self.state[0:3] = pos_new | |
| self.state[3:6] = vel_new | |
| self.state[6:9] = orn_new | |
| self.state[9:12] = ang_vel_new | |
| # Compute reward | |
| reward = self._compute_reward(pos_new, orn_new, vel_new, ang_vel_new) | |
| # Check termination | |
| terminated = False | |
| # Out of bounds | |
| if (pos_new < self.BOUNDS[0]).any() or (pos_new > self.BOUNDS[1]).any(): | |
| terminated = True | |
| reward -= 100.0 | |
| # Crash (ground) | |
| if pos_new[2] < 0.1: | |
| terminated = True | |
| reward -= 100.0 | |
| # Target reached | |
| dist = np.linalg.norm(pos_new - self.target_pos) | |
| if dist < 0.5: | |
| terminated = True | |
| reward += 100.0 | |
| truncated = self.step_count >= self.MAX_STEPS | |
| return self._get_obs(), reward, terminated, truncated, {} | |
| def _get_obs(self): | |
| pos = self.state[0:3] | |
| vel = self.state[3:6] | |
| orn = self.state[6:9] | |
| ang_vel = self.state[9:12] | |
| # Relative position to target | |
| rel_pos = self.target_pos - pos | |
| # Observation: [rel_x, rel_y, rel_z, roll, pitch, yaw, vx, vy, vz, wx, wy, wz] | |
| obs = np.concatenate([rel_pos, orn, vel, ang_vel]).astype(np.float32) | |
| return obs | |
| def _compute_reward(self, pos, orn, vel, ang_vel): | |
| # Stability | |
| r_smooth = -0.1 * np.linalg.norm(ang_vel) | |
| r_stability = -np.linalg.norm(orn[0:2]) | |
| # Target distance | |
| dist = np.linalg.norm(pos - self.target_pos) | |
| r_target = -dist * 0.5 | |
| # Boundary safety | |
| d_min = np.min([ | |
| pos[0] - self.BOUNDS[0][0], | |
| self.BOUNDS[1][0] - pos[0], | |
| pos[1] - self.BOUNDS[0][1], | |
| self.BOUNDS[1][1] - pos[1], | |
| pos[2] - self.BOUNDS[0][2], | |
| self.BOUNDS[1][2] - pos[2] | |
| ]) | |
| r_boundary = 0.0 | |
| if d_min < 1.0: | |
| r_boundary = -np.exp(1.0 - d_min) | |
| return r_stability + r_smooth + r_target + r_boundary | |
| def close(self): | |
| pass | |