Spaces:

dpang
/

rans-env

Sleeping

App Files Files Community

rans-env / server /tasks /track_linear_velocity.py

dpang

Add server/tasks/track_linear_velocity.py

2f2f4a9 verified 3 days ago

raw

history blame contribute delete

2.87 kB

	# Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
	# RANS: arXiv:2310.07393

	"""
	TrackLinearVelocity Task
	========================
	The spacecraft must maintain a randomly sampled target linear velocity (vx_t, vy_t).

	Observation (6 values):
	[Δvx, Δvy, cos(θ), sin(θ), vx, vy]
	where Δv = v_target − v_current.

	Reward:
	r = exp(-‖v_error‖² / (2·σ_v²))

	Episode terminates when ‖v_error‖ < tolerance.
	"""

	from __future__ import annotations

	import math
	from typing import Any, Dict, Tuple

	import numpy as np

	from .base import BaseTask


	class TrackLinearVelocityTask(BaseTask):
	"""Track a target 2-D linear velocity in the world frame."""

	_DEFAULTS: Dict[str, Any] = {
	"tolerance": 0.05, # success threshold (m/s)
	"reward_sigma": 0.50, # velocity reward width
	"max_target_speed": 1.00, # maximum sampled target speed (m/s)
	}

	def __init__(self, config: Dict[str, Any] \| None = None) -> None:
	super().__init__(config)
	cfg = {self._DEFAULTS, (config or {})}
	self.tolerance: float = cfg["tolerance"]
	self.reward_sigma: float = cfg["reward_sigma"]
	self.max_target_speed: float = cfg["max_target_speed"]

	self._target_vel = np.zeros(2, dtype=np.float64)

	# ------------------------------------------------------------------
	# BaseTask interface
	# ------------------------------------------------------------------

	def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
	speed = np.random.uniform(0.0, self.max_target_speed)
	direction = np.random.uniform(0.0, 2.0 * math.pi)
	self._target_vel = np.array(
	[speed * math.cos(direction), speed * math.sin(direction)]
	)
	return {"target_linear_velocity": self._target_vel.tolist()}

	def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
	_, _, theta, vx, vy, _ = spacecraft_state
	dvx = self._target_vel[0] - vx
	dvy = self._target_vel[1] - vy
	return np.array(
	[dvx, dvy, math.cos(theta), math.sin(theta), vx, vy],
	dtype=np.float32,
	)

	def compute_reward(
	self, spacecraft_state: np.ndarray
	) -> Tuple[float, bool, Dict[str, Any]]:
	vx, vy = spacecraft_state[3], spacecraft_state[4]
	vel_error = float(np.linalg.norm(self._target_vel - np.array([vx, vy])))
	reward = self._reward_exponential(vel_error, self.reward_sigma)
	goal_reached = vel_error < self.tolerance
	info = {
	"velocity_error_ms": vel_error,
	"goal_reached": goal_reached,
	"target_linear_velocity": self._target_vel.tolist(),
	}
	return reward, goal_reached, info

	@property
	def num_observations(self) -> int:
	return 6