dpang commited on
Commit
bd4ceb2
·
verified ·
1 Parent(s): 9cb40fe

Add server/tasks/go_to_position.py

Browse files
Files changed (1) hide show
  1. server/tasks/go_to_position.py +92 -0
server/tasks/go_to_position.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
2
+ # RANS: arXiv:2310.07393
3
+
4
+ """
5
+ GoToPosition Task
6
+ =================
7
+ The spacecraft must reach a randomly sampled target position (x_t, y_t).
8
+
9
+ Observation (6 values):
10
+ [Δx_body, Δy_body, cos(θ), sin(θ), vx, vy]
11
+ where Δ values are relative-to-target in the spacecraft's body frame.
12
+
13
+ Reward (RANS paper, exponential mode):
14
+ r = exp(-‖p_error‖² / (2·σ_p²))
15
+
16
+ Episode terminates when ‖p_error‖ < tolerance OR step limit reached.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import math
22
+ from typing import Any, Dict, Tuple
23
+
24
+ import numpy as np
25
+
26
+ from .base import BaseTask
27
+
28
+
29
+ class GoToPositionTask(BaseTask):
30
+ """Navigate spacecraft to a target 2-D position."""
31
+
32
+ # Default hyper-parameters (can be overridden via config dict)
33
+ _DEFAULTS: Dict[str, Any] = {
34
+ "tolerance": 0.10, # success threshold (m)
35
+ "reward_sigma": 1.00, # width of Gaussian reward
36
+ "reward_mode": "exponential", # "exponential" | "inverse"
37
+ "spawn_min_radius": 0.50, # minimum target distance from origin (m)
38
+ "spawn_max_radius": 3.00, # maximum target distance from origin (m)
39
+ }
40
+
41
+ def __init__(self, config: Dict[str, Any] | None = None) -> None:
42
+ super().__init__(config)
43
+ cfg = {**self._DEFAULTS, **(config or {})}
44
+ self.tolerance: float = cfg["tolerance"]
45
+ self.reward_sigma: float = cfg["reward_sigma"]
46
+ self.reward_mode: str = cfg["reward_mode"]
47
+ self.spawn_min_radius: float = cfg["spawn_min_radius"]
48
+ self.spawn_max_radius: float = cfg["spawn_max_radius"]
49
+
50
+ self._target = np.zeros(2, dtype=np.float64)
51
+
52
+ # ------------------------------------------------------------------
53
+ # BaseTask interface
54
+ # ------------------------------------------------------------------
55
+
56
+ def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
57
+ r = np.random.uniform(self.spawn_min_radius, self.spawn_max_radius)
58
+ angle = np.random.uniform(0.0, 2.0 * math.pi)
59
+ self._target = np.array([r * math.cos(angle), r * math.sin(angle)])
60
+ return {"target_position": self._target.tolist()}
61
+
62
+ def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
63
+ x, y, theta, vx, vy, _ = spacecraft_state
64
+ dx, dy = self._target[0] - x, self._target[1] - y
65
+ dx_b, dy_b = self._world_to_body(dx, dy, theta)
66
+ return np.array(
67
+ [dx_b, dy_b, math.cos(theta), math.sin(theta), vx, vy],
68
+ dtype=np.float32,
69
+ )
70
+
71
+ def compute_reward(
72
+ self, spacecraft_state: np.ndarray
73
+ ) -> Tuple[float, bool, Dict[str, Any]]:
74
+ x, y = spacecraft_state[0], spacecraft_state[1]
75
+ pos_error = float(np.linalg.norm(self._target - np.array([x, y])))
76
+
77
+ if self.reward_mode == "exponential":
78
+ reward = self._reward_exponential(pos_error, self.reward_sigma)
79
+ else:
80
+ reward = self._reward_inverse(pos_error)
81
+
82
+ goal_reached = pos_error < self.tolerance
83
+ info = {
84
+ "position_error_m": pos_error,
85
+ "goal_reached": goal_reached,
86
+ "target_position": self._target.tolist(),
87
+ }
88
+ return reward, goal_reached, info
89
+
90
+ @property
91
+ def num_observations(self) -> int:
92
+ return 6