dpang commited on
Commit
7410974
·
verified ·
1 Parent(s): 2f2f4a9

Add server/tasks/track_linear_angular_velocity.py

Browse files
server/tasks/track_linear_angular_velocity.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
2
+ # RANS: arXiv:2310.07393
3
+
4
+ """
5
+ TrackLinearAngularVelocity Task
6
+ ================================
7
+ The spacecraft must simultaneously track a target linear velocity (vx_t, vy_t)
8
+ AND a target angular velocity (ω_t).
9
+
10
+ Observation (8 values):
11
+ [Δvx, Δvy, Δω, cos(θ), sin(θ), vx, vy, ω]
12
+
13
+ Reward:
14
+ r = w_l · exp(-‖Δv‖² / (2·σ_l²))
15
+ + w_a · exp(-|Δω|² / (2·σ_a²))
16
+
17
+ This matches the TrackLinearAngularVelocityTask in the RANS codebase
18
+ (omniisaacgymenvs/tasks/MFP2D/track_linear_angular_velocity.py).
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import math
24
+ from typing import Any, Dict, Tuple
25
+
26
+ import numpy as np
27
+
28
+ from .base import BaseTask
29
+
30
+
31
+ class TrackLinearAngularVelocityTask(BaseTask):
32
+ """Track target linear AND angular velocity simultaneously."""
33
+
34
+ _DEFAULTS: Dict[str, Any] = {
35
+ "tolerance_linear": 0.05, # (m/s)
36
+ "tolerance_angular": 0.10, # (rad/s)
37
+ "reward_sigma_linear": 0.50,
38
+ "reward_sigma_angular": 0.50,
39
+ "linear_weight": 0.70, # w_l
40
+ "angular_weight": 0.30, # w_a
41
+ "max_target_speed": 1.00, # (m/s)
42
+ "max_target_angular_speed": 1.00, # (rad/s)
43
+ }
44
+
45
+ def __init__(self, config: Dict[str, Any] | None = None) -> None:
46
+ super().__init__(config)
47
+ cfg = {**self._DEFAULTS, **(config or {})}
48
+ self.tolerance_linear: float = cfg["tolerance_linear"]
49
+ self.tolerance_angular: float = cfg["tolerance_angular"]
50
+ self.reward_sigma_linear: float = cfg["reward_sigma_linear"]
51
+ self.reward_sigma_angular: float = cfg["reward_sigma_angular"]
52
+ self.linear_weight: float = cfg["linear_weight"]
53
+ self.angular_weight: float = cfg["angular_weight"]
54
+ self.max_target_speed: float = cfg["max_target_speed"]
55
+ self.max_target_angular_speed: float = cfg["max_target_angular_speed"]
56
+
57
+ self._target_linear_vel = np.zeros(2, dtype=np.float64)
58
+ self._target_angular_vel: float = 0.0
59
+
60
+ # ------------------------------------------------------------------
61
+ # BaseTask interface
62
+ # ------------------------------------------------------------------
63
+
64
+ def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
65
+ speed = np.random.uniform(0.0, self.max_target_speed)
66
+ direction = np.random.uniform(0.0, 2.0 * math.pi)
67
+ self._target_linear_vel = np.array(
68
+ [speed * math.cos(direction), speed * math.sin(direction)]
69
+ )
70
+ self._target_angular_vel = np.random.uniform(
71
+ -self.max_target_angular_speed, self.max_target_angular_speed
72
+ )
73
+ return {
74
+ "target_linear_velocity": self._target_linear_vel.tolist(),
75
+ "target_angular_velocity": self._target_angular_vel,
76
+ }
77
+
78
+ def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
79
+ _, _, theta, vx, vy, omega = spacecraft_state
80
+ dvx = self._target_linear_vel[0] - vx
81
+ dvy = self._target_linear_vel[1] - vy
82
+ domega = self._target_angular_vel - omega
83
+ return np.array(
84
+ [dvx, dvy, domega, math.cos(theta), math.sin(theta), vx, vy, omega],
85
+ dtype=np.float32,
86
+ )
87
+
88
+ def compute_reward(
89
+ self, spacecraft_state: np.ndarray
90
+ ) -> Tuple[float, bool, Dict[str, Any]]:
91
+ vx, vy, omega = spacecraft_state[3], spacecraft_state[4], spacecraft_state[5]
92
+ linear_error = float(
93
+ np.linalg.norm(self._target_linear_vel - np.array([vx, vy]))
94
+ )
95
+ angular_error = abs(self._target_angular_vel - omega)
96
+
97
+ r_linear = self._reward_exponential(linear_error, self.reward_sigma_linear)
98
+ r_angular = self._reward_exponential(angular_error, self.reward_sigma_angular)
99
+ reward = self.linear_weight * r_linear + self.angular_weight * r_angular
100
+
101
+ goal_reached = (
102
+ linear_error < self.tolerance_linear
103
+ and angular_error < self.tolerance_angular
104
+ )
105
+ info = {
106
+ "linear_velocity_error_ms": linear_error,
107
+ "angular_velocity_error_rads": angular_error,
108
+ "goal_reached": goal_reached,
109
+ }
110
+ return reward, goal_reached, info
111
+
112
+ @property
113
+ def num_observations(self) -> int:
114
+ return 8