Spaces:

iteratehack
/

team222

Runtime error

File size: 7,232 Bytes

import numpy as np
import gymnasium as gym
from gymnasium import spaces

class Drone3DEnv(gym.Env):
    """Custom 3D Drone Environment with physics simulation - NO PyBullet dependencies"""
    metadata = {"render_modes": []}

    def __init__(self, render_mode=None):
        super().__init__()
        self.render_mode = render_mode
        
        # Constants
        self.BOUNDS = np.array([[-5, -5, 0], [5, 5, 10]])
        self.TARGET_BOUNDS = np.array([[-4, -4, 1], [4, 4, 9]])
        self.MAX_STEPS = 1000
        self.dt = 0.02  # 50Hz physics
        
        # Physics parameters
        self.mass = 0.027  # kg (Crazyflie)
        self.g = 9.81
        self.drag_coeff = 0.01
        
        # State: [x, y, z, vx, vy, vz, roll, pitch, yaw, wx, wy, wz]
        self.state = np.zeros(12, dtype=np.float32)
        
        # Action Space: 4 motor thrusts (-1 to 1)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float32)
        
        # Observation Space: relative position + velocities + orientation
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
        
        self.wind_vector = np.zeros(3, dtype=np.float32)
        self.target_pos = np.zeros(3, dtype=np.float32)
        self.step_count = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        # Reset state: hovering at origin
        self.state = np.zeros(12, dtype=np.float32)
        self.state[2] = 1.0  # z = 1m
        
        # Random target
        self.target_pos = np.random.uniform(self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1]).astype(np.float32)
        
        # Random wind
        self.wind_vector = np.random.uniform(-1, 1, 3).astype(np.float32)
        self.wind_vector[2] = 0  # No vertical wind
        
        self.step_count = 0
        return self._get_obs(), {}

    def step(self, action):
        self.step_count += 1
        
        # Update wind (random walk)
        self.wind_vector += np.random.normal(0, 0.1, 3).astype(np.float32)
        self.wind_vector = np.clip(self.wind_vector, -5, 5)
        self.wind_vector[2] = 0
        
        # Update moving target
        target_move = np.random.normal(0, 0.05, 3).astype(np.float32)
        self.target_pos += target_move
        self.target_pos = np.clip(self.target_pos, self.TARGET_BOUNDS[0], self.TARGET_BOUNDS[1])
        
        # Extract state
        pos = self.state[0:3]
        vel = self.state[3:6]
        orn = self.state[6:9]  # roll, pitch, yaw
        ang_vel = self.state[9:12]
        
        # Convert action to thrust (0 to 1)
        thrust = (action + 1.0) / 2.0
        thrust = np.clip(thrust, 0, 1)
        
        # Total thrust force (simplified)
        total_thrust = np.sum(thrust) * 0.15  # Max ~0.6N total
        
        # Forces in body frame
        # Simplified: thrust is vertical in body frame
        thrust_force_body = np.array([0, 0, total_thrust], dtype=np.float32)
        
        # Rotate to world frame (simplified rotation)
        c_r, s_r = np.cos(orn[0]), np.sin(orn[0])
        c_p, s_p = np.cos(orn[1]), np.sin(orn[1])
        
        # Simplified rotation matrix (roll-pitch only for thrust)
        thrust_force_world = np.array([
            -s_p * thrust_force_body[2],
            s_r * c_p * thrust_force_body[2],
            c_r * c_p * thrust_force_body[2]
        ], dtype=np.float32)
        
        # Gravity
        gravity_force = np.array([0, 0, -self.mass * self.g], dtype=np.float32)
        
        # Wind force
        wind_force = self.wind_vector * 0.01
        
        # Drag
        drag_force = -self.drag_coeff * vel
        
        # Total force
        total_force = thrust_force_world + gravity_force + wind_force + drag_force
        
        # Acceleration
        accel = total_force / self.mass
        
        # Update velocity and position (Euler integration)
        vel_new = vel + accel * self.dt
        pos_new = pos + vel_new * self.dt
        
        # Torques (simplified from thrust differential)
        # Roll torque from left-right thrust difference
        roll_torque = (thrust[1] - thrust[3]) * 0.01
        # Pitch torque from front-back thrust difference  
        pitch_torque = (thrust[0] - thrust[2]) * 0.01
        # Yaw torque (simplified)
        yaw_torque = 0.0
        
        # Angular acceleration (simplified inertia)
        ang_accel = np.array([roll_torque, pitch_torque, yaw_torque], dtype=np.float32) * 10.0
        
        # Update angular velocity and orientation
        ang_vel_new = ang_vel + ang_accel * self.dt
        ang_vel_new = np.clip(ang_vel_new, -10, 10)  # Limit angular velocity
        
        orn_new = orn + ang_vel_new * self.dt
        # Wrap angles
        orn_new[0] = np.arctan2(np.sin(orn_new[0]), np.cos(orn_new[0]))  # roll
        orn_new[1] = np.arctan2(np.sin(orn_new[1]), np.cos(orn_new[1]))  # pitch
        orn_new[2] = np.arctan2(np.sin(orn_new[2]), np.cos(orn_new[2]))  # yaw
        
        # Update state
        self.state[0:3] = pos_new
        self.state[3:6] = vel_new
        self.state[6:9] = orn_new
        self.state[9:12] = ang_vel_new
        
        # Compute reward
        reward = self._compute_reward(pos_new, orn_new, vel_new, ang_vel_new)
        
        # Check termination
        terminated = False
        
        # Out of bounds
        if (pos_new < self.BOUNDS[0]).any() or (pos_new > self.BOUNDS[1]).any():
            terminated = True
            reward -= 100.0
        
        # Crash (ground)
        if pos_new[2] < 0.1:
            terminated = True
            reward -= 100.0
        
        # Target reached
        dist = np.linalg.norm(pos_new - self.target_pos)
        if dist < 0.5:
            terminated = True
            reward += 100.0
        
        truncated = self.step_count >= self.MAX_STEPS
        
        return self._get_obs(), reward, terminated, truncated, {}

    def _get_obs(self):
        pos = self.state[0:3]
        vel = self.state[3:6]
        orn = self.state[6:9]
        ang_vel = self.state[9:12]
        
        # Relative position to target
        rel_pos = self.target_pos - pos
        
        # Observation: [rel_x, rel_y, rel_z, roll, pitch, yaw, vx, vy, vz, wx, wy, wz]
        obs = np.concatenate([rel_pos, orn, vel, ang_vel]).astype(np.float32)
        return obs

    def _compute_reward(self, pos, orn, vel, ang_vel):
        # Stability
        r_smooth = -0.1 * np.linalg.norm(ang_vel)
        r_stability = -np.linalg.norm(orn[0:2])
        
        # Target distance
        dist = np.linalg.norm(pos - self.target_pos)
        r_target = -dist * 0.5
        
        # Boundary safety
        d_min = np.min([
            pos[0] - self.BOUNDS[0][0],
            self.BOUNDS[1][0] - pos[0],
            pos[1] - self.BOUNDS[0][1],
            self.BOUNDS[1][1] - pos[1],
            pos[2] - self.BOUNDS[0][2],
            self.BOUNDS[1][2] - pos[2]
        ])
        
        r_boundary = 0.0
        if d_min < 1.0:
            r_boundary = -np.exp(1.0 - d_min)
        
        return r_stability + r_smooth + r_target + r_boundary

    def close(self):
        pass