team222 / env_3d.py
ylop's picture
FIX: Replace PyBullet with custom NumPy physics - TRAINING WILL START NOW
595200b verified
import numpy as np
import gymnasium as gym
from gymnasium import spaces
class Drone3DEnv(gym.Env):
"""Custom 3D Drone Environment with physics simulation - NO PyBullet dependencies"""
metadata = {"render_modes": []}
def __init__(self, render_mode=None):
super().__init__()
self.render_mode = render_mode
# Constants
self.BOUNDS = np.array([[-5, -5, 0], [5, 5, 10]])
self.TARGET_BOUNDS = np.array([[-4, -4, 1], [4, 4, 9]])
self.MAX_STEPS = 1000
self.dt = 0.02 # 50Hz physics
# Physics parameters
self.mass = 0.027 # kg (Crazyflie)
self.g = 9.81
self.drag_coeff = 0.01
# State: [x, y, z, vx, vy, vz, roll, pitch, yaw, wx, wy, wz]
self.state = np.zeros(12, dtype=np.float32)
# Action Space: 4 motor thrusts (-1 to 1)
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
# Observation Space: relative position + velocities + orientation
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
self.wind_vector = np.zeros(3, dtype=np.float32)
self.target_pos = np.zeros(3, dtype=np.float32)
self.step_count = 0
def reset(self, seed=None, options=None):
super().reset(seed=seed)
# Reset state: hovering at origin
self.state = np.zeros(12, dtype=np.float32)
self.state[2] = 1.0 # z = 1m
# Random target
self.target_pos = np.random.uniform(self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1]).astype(np.float32)
# Random wind
self.wind_vector = np.random.uniform(-1, 1, 3).astype(np.float32)
self.wind_vector[2] = 0 # No vertical wind
self.step_count = 0
return self._get_obs(), {}
def step(self, action):
self.step_count += 1
# Update wind (random walk)
self.wind_vector += np.random.normal(0, 0.1, 3).astype(np.float32)
self.wind_vector = np.clip(self.wind_vector, -5, 5)
self.wind_vector[2] = 0
# Update moving target
target_move = np.random.normal(0, 0.05, 3).astype(np.float32)
self.target_pos += target_move
self.target_pos = np.clip(self.target_pos, self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1])
# Extract state
pos = self.state[0:3]
vel = self.state[3:6]
orn = self.state[6:9] # roll, pitch, yaw
ang_vel = self.state[9:12]
# Convert action to thrust (0 to 1)
thrust = (action + 1.0) / 2.0
thrust = np.clip(thrust, 0, 1)
# Total thrust force (simplified)
total_thrust = np.sum(thrust) * 0.15 # Max ~0.6N total
# Forces in body frame
# Simplified: thrust is vertical in body frame
thrust_force_body = np.array([0, 0, total_thrust], dtype=np.float32)
# Rotate to world frame (simplified rotation)
c_r, s_r = np.cos(orn[0]), np.sin(orn[0])
c_p, s_p = np.cos(orn[1]), np.sin(orn[1])
# Simplified rotation matrix (roll-pitch only for thrust)
thrust_force_world = np.array([
-s_p * thrust_force_body[2],
s_r * c_p * thrust_force_body[2],
c_r * c_p * thrust_force_body[2]
], dtype=np.float32)
# Gravity
gravity_force = np.array([0, 0, -self.mass * self.g], dtype=np.float32)
# Wind force
wind_force = self.wind_vector * 0.01
# Drag
drag_force = -self.drag_coeff * vel
# Total force
total_force = thrust_force_world + gravity_force + wind_force + drag_force
# Acceleration
accel = total_force / self.mass
# Update velocity and position (Euler integration)
vel_new = vel + accel * self.dt
pos_new = pos + vel_new * self.dt
# Torques (simplified from thrust differential)
# Roll torque from left-right thrust difference
roll_torque = (thrust[1] - thrust[3]) * 0.01
# Pitch torque from front-back thrust difference
pitch_torque = (thrust[0] - thrust[2]) * 0.01
# Yaw torque (simplified)
yaw_torque = 0.0
# Angular acceleration (simplified inertia)
ang_accel = np.array([roll_torque, pitch_torque, yaw_torque], dtype=np.float32) * 10.0
# Update angular velocity and orientation
ang_vel_new = ang_vel + ang_accel * self.dt
ang_vel_new = np.clip(ang_vel_new, -10, 10) # Limit angular velocity
orn_new = orn + ang_vel_new * self.dt
# Wrap angles
orn_new[0] = np.arctan2(np.sin(orn_new[0]), np.cos(orn_new[0])) # roll
orn_new[1] = np.arctan2(np.sin(orn_new[1]), np.cos(orn_new[1])) # pitch
orn_new[2] = np.arctan2(np.sin(orn_new[2]), np.cos(orn_new[2])) # yaw
# Update state
self.state[0:3] = pos_new
self.state[3:6] = vel_new
self.state[6:9] = orn_new
self.state[9:12] = ang_vel_new
# Compute reward
reward = self._compute_reward(pos_new, orn_new, vel_new, ang_vel_new)
# Check termination
terminated = False
# Out of bounds
if (pos_new < self.BOUNDS[0]).any() or (pos_new > self.BOUNDS[1]).any():
terminated = True
reward -= 100.0
# Crash (ground)
if pos_new[2] < 0.1:
terminated = True
reward -= 100.0
# Target reached
dist = np.linalg.norm(pos_new - self.target_pos)
if dist < 0.5:
terminated = True
reward += 100.0
truncated = self.step_count >= self.MAX_STEPS
return self._get_obs(), reward, terminated, truncated, {}
def _get_obs(self):
pos = self.state[0:3]
vel = self.state[3:6]
orn = self.state[6:9]
ang_vel = self.state[9:12]
# Relative position to target
rel_pos = self.target_pos - pos
# Observation: [rel_x, rel_y, rel_z, roll, pitch, yaw, vx, vy, vz, wx, wy, wz]
obs = np.concatenate([rel_pos, orn, vel, ang_vel]).astype(np.float32)
return obs
def _compute_reward(self, pos, orn, vel, ang_vel):
# Stability
r_smooth = -0.1 * np.linalg.norm(ang_vel)
r_stability = -np.linalg.norm(orn[0:2])
# Target distance
dist = np.linalg.norm(pos - self.target_pos)
r_target = -dist * 0.5
# Boundary safety
d_min = np.min([
pos[0] - self.BOUNDS[0][0],
self.BOUNDS[1][0] - pos[0],
pos[1] - self.BOUNDS[0][1],
self.BOUNDS[1][1] - pos[1],
pos[2] - self.BOUNDS[0][2],
self.BOUNDS[1][2] - pos[2]
])
r_boundary = 0.0
if d_min < 1.0:
r_boundary = -np.exp(1.0 - d_min)
return r_stability + r_smooth + r_target + r_boundary
def close(self):
pass