| | |
| | |
| |
|
| | """ |
| | TrackLinearAngularVelocity Task |
| | ================================ |
| | The spacecraft must simultaneously track a target linear velocity (vx_t, vy_t) |
| | AND a target angular velocity (ω_t). |
| | |
| | Observation (8 values): |
| | [Δvx, Δvy, Δω, cos(θ), sin(θ), vx, vy, ω] |
| | |
| | Reward: |
| | r = w_l · exp(-‖Δv‖² / (2·σ_l²)) |
| | + w_a · exp(-|Δω|² / (2·σ_a²)) |
| | |
| | This matches the TrackLinearAngularVelocityTask in the RANS codebase |
| | (omniisaacgymenvs/tasks/MFP2D/track_linear_angular_velocity.py). |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import math |
| | from typing import Any, Dict, Tuple |
| |
|
| | import numpy as np |
| |
|
| | from .base import BaseTask |
| |
|
| |
|
| | class TrackLinearAngularVelocityTask(BaseTask): |
| | """Track target linear AND angular velocity simultaneously.""" |
| |
|
| | _DEFAULTS: Dict[str, Any] = { |
| | "tolerance_linear": 0.05, |
| | "tolerance_angular": 0.10, |
| | "reward_sigma_linear": 0.50, |
| | "reward_sigma_angular": 0.50, |
| | "linear_weight": 0.70, |
| | "angular_weight": 0.30, |
| | "max_target_speed": 1.00, |
| | "max_target_angular_speed": 1.00, |
| | } |
| |
|
| | def __init__(self, config: Dict[str, Any] | None = None) -> None: |
| | super().__init__(config) |
| | cfg = {**self._DEFAULTS, **(config or {})} |
| | self.tolerance_linear: float = cfg["tolerance_linear"] |
| | self.tolerance_angular: float = cfg["tolerance_angular"] |
| | self.reward_sigma_linear: float = cfg["reward_sigma_linear"] |
| | self.reward_sigma_angular: float = cfg["reward_sigma_angular"] |
| | self.linear_weight: float = cfg["linear_weight"] |
| | self.angular_weight: float = cfg["angular_weight"] |
| | self.max_target_speed: float = cfg["max_target_speed"] |
| | self.max_target_angular_speed: float = cfg["max_target_angular_speed"] |
| |
|
| | self._target_linear_vel = np.zeros(2, dtype=np.float64) |
| | self._target_angular_vel: float = 0.0 |
| |
|
| | |
| | |
| | |
| |
|
| | def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]: |
| | speed = np.random.uniform(0.0, self.max_target_speed) |
| | direction = np.random.uniform(0.0, 2.0 * math.pi) |
| | self._target_linear_vel = np.array( |
| | [speed * math.cos(direction), speed * math.sin(direction)] |
| | ) |
| | self._target_angular_vel = np.random.uniform( |
| | -self.max_target_angular_speed, self.max_target_angular_speed |
| | ) |
| | return { |
| | "target_linear_velocity": self._target_linear_vel.tolist(), |
| | "target_angular_velocity": self._target_angular_vel, |
| | } |
| |
|
| | def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray: |
| | _, _, theta, vx, vy, omega = spacecraft_state |
| | dvx = self._target_linear_vel[0] - vx |
| | dvy = self._target_linear_vel[1] - vy |
| | domega = self._target_angular_vel - omega |
| | return np.array( |
| | [dvx, dvy, domega, math.cos(theta), math.sin(theta), vx, vy, omega], |
| | dtype=np.float32, |
| | ) |
| |
|
| | def compute_reward( |
| | self, spacecraft_state: np.ndarray |
| | ) -> Tuple[float, bool, Dict[str, Any]]: |
| | vx, vy, omega = spacecraft_state[3], spacecraft_state[4], spacecraft_state[5] |
| | linear_error = float( |
| | np.linalg.norm(self._target_linear_vel - np.array([vx, vy])) |
| | ) |
| | angular_error = abs(self._target_angular_vel - omega) |
| |
|
| | r_linear = self._reward_exponential(linear_error, self.reward_sigma_linear) |
| | r_angular = self._reward_exponential(angular_error, self.reward_sigma_angular) |
| | reward = self.linear_weight * r_linear + self.angular_weight * r_angular |
| |
|
| | goal_reached = ( |
| | linear_error < self.tolerance_linear |
| | and angular_error < self.tolerance_angular |
| | ) |
| | info = { |
| | "linear_velocity_error_ms": linear_error, |
| | "angular_velocity_error_rads": angular_error, |
| | "goal_reached": goal_reached, |
| | } |
| | return reward, goal_reached, info |
| |
|
| | @property |
| | def num_observations(self) -> int: |
| | return 8 |
| |
|