rans-env / server /tasks /track_linear_velocity.py
dpang's picture
Add server/tasks/track_linear_velocity.py
2f2f4a9 verified
# Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
# RANS: arXiv:2310.07393
"""
TrackLinearVelocity Task
========================
The spacecraft must maintain a randomly sampled target linear velocity (vx_t, vy_t).
Observation (6 values):
[Δvx, Δvy, cos(θ), sin(θ), vx, vy]
where Δv = v_target − v_current.
Reward:
r = exp(-‖v_error‖² / (2·σ_v²))
Episode terminates when ‖v_error‖ < tolerance.
"""
from __future__ import annotations
import math
from typing import Any, Dict, Tuple
import numpy as np
from .base import BaseTask
class TrackLinearVelocityTask(BaseTask):
"""Track a target 2-D linear velocity in the world frame."""
_DEFAULTS: Dict[str, Any] = {
"tolerance": 0.05, # success threshold (m/s)
"reward_sigma": 0.50, # velocity reward width
"max_target_speed": 1.00, # maximum sampled target speed (m/s)
}
def __init__(self, config: Dict[str, Any] | None = None) -> None:
super().__init__(config)
cfg = {**self._DEFAULTS, **(config or {})}
self.tolerance: float = cfg["tolerance"]
self.reward_sigma: float = cfg["reward_sigma"]
self.max_target_speed: float = cfg["max_target_speed"]
self._target_vel = np.zeros(2, dtype=np.float64)
# ------------------------------------------------------------------
# BaseTask interface
# ------------------------------------------------------------------
def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
speed = np.random.uniform(0.0, self.max_target_speed)
direction = np.random.uniform(0.0, 2.0 * math.pi)
self._target_vel = np.array(
[speed * math.cos(direction), speed * math.sin(direction)]
)
return {"target_linear_velocity": self._target_vel.tolist()}
def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
_, _, theta, vx, vy, _ = spacecraft_state
dvx = self._target_vel[0] - vx
dvy = self._target_vel[1] - vy
return np.array(
[dvx, dvy, math.cos(theta), math.sin(theta), vx, vy],
dtype=np.float32,
)
def compute_reward(
self, spacecraft_state: np.ndarray
) -> Tuple[float, bool, Dict[str, Any]]:
vx, vy = spacecraft_state[3], spacecraft_state[4]
vel_error = float(np.linalg.norm(self._target_vel - np.array([vx, vy])))
reward = self._reward_exponential(vel_error, self.reward_sigma)
goal_reached = vel_error < self.tolerance
info = {
"velocity_error_ms": vel_error,
"goal_reached": goal_reached,
"target_linear_velocity": self._target_vel.tolist(),
}
return reward, goal_reached, info
@property
def num_observations(self) -> int:
return 6