Spaces:

dpang
/

rans-env

Sleeping

App Files Files Community

dpang commited on Mar 7

Commit

9cb40fe

verified ·

1 Parent(s): 0adb9e2

Add server/tasks/base.py

Browse files

Files changed (1) hide show

server/tasks/base.py +110 -0

server/tasks/base.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
+# RANS: Reinforcement Learning based Autonomous Navigation for Spacecrafts
+# arXiv:2310.07393 — El-Hariry, Richard, Olivares-Mendez
+#
+# OpenEnv-compatible implementation
+"""Base class for RANS spacecraft navigation tasks."""
+from __future__ import annotations
+import math
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Tuple
+import numpy as np
+class BaseTask(ABC):
+    """
+    Abstract base class for all RANS spacecraft navigation tasks.
+    Subclasses define:
+      - The task-specific observation vector
+      - The reward function (matching the RANS paper's formulations)
+      - Target generation and episode reset logic
+    """
+    def __init__(self, config: Dict[str, Any] | None = None) -> None:
+        self.config: Dict[str, Any] = config or {}
+        self._target: Any = None
+    # ------------------------------------------------------------------
+    # Abstract interface
+    # ------------------------------------------------------------------
+    @abstractmethod
+    def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
+        """
+        Sample a new target and reset internal episode state.
+        Args:
+            spacecraft_state: Current state vector [x, y, θ, vx, vy, ω].
+        Returns:
+            Dictionary with task metadata (target values, etc.).
+        """
+    @abstractmethod
+    def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
+        """
+        Compute the task-specific observation vector from the spacecraft state.
+        Args:
+            spacecraft_state: Current state [x, y, θ, vx, vy, ω].
+        Returns:
+            1-D float32 array of length ``num_observations``.
+        """
+    @abstractmethod
+    def compute_reward(
+        self, spacecraft_state: np.ndarray
+    ) -> Tuple[float, bool, Dict[str, Any]]:
+        """
+        Compute reward, done flag, and diagnostic info.
+        Args:
+            spacecraft_state: Current state [x, y, θ, vx, vy, ω].
+        Returns:
+            (reward, done, info) tuple.
+        """
+    # ------------------------------------------------------------------
+    # Common helpers
+    # ------------------------------------------------------------------
+    @property
+    def num_observations(self) -> int:
+        """Size of the task-specific state observation vector."""
+        return 0
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
+    # ------------------------------------------------------------------
+    # Shared reward primitives  (from RANS paper Sec. IV-B)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _reward_exponential(error: float, sigma: float) -> float:
+        """exp(-error² / (2·σ²))  — tight peak near zero."""
+        return math.exp(-(error**2) / max(2.0 * sigma**2, 1e-9))
+    @staticmethod
+    def _reward_inverse(error: float) -> float:
+        """1 / (1 + error)  — smooth monotone decay."""
+        return 1.0 / (1.0 + error)
+    @staticmethod
+    def _wrap_angle(angle: float) -> float:
+        """Wrap angle to (−π, π]."""
+        return (angle + math.pi) % (2.0 * math.pi) - math.pi
+    @staticmethod
+    def _world_to_body(dx: float, dy: float, theta: float) -> Tuple[float, float]:
+        """Rotate world-frame displacement into body frame."""
+        c, s = math.cos(theta), math.sin(theta)
+        return c * dx + s * dy, -s * dx + c * dy