# Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
# RANS: Reinforcement Learning based Autonomous Navigation for Spacecrafts
# arXiv:2310.07393 — El-Hariry, Richard, Olivares-Mendez
#
# OpenEnv-compatible implementation

"""Base class for RANS spacecraft navigation tasks."""

from __future__ import annotations

import math
from abc import ABC, abstractmethod
from typing import Any, Dict, Tuple

import numpy as np


class BaseTask(ABC):
    """
    Abstract base class for all RANS spacecraft navigation tasks.

    Subclasses define:
      - The task-specific observation vector
      - The reward function (matching the RANS paper's formulations)
      - Target generation and episode reset logic
    """

    def __init__(self, config: Dict[str, Any] | None = None) -> None:
        self.config: Dict[str, Any] = config or {}
        self._target: Any = None

    # ------------------------------------------------------------------
    # Abstract interface
    # ------------------------------------------------------------------

    @abstractmethod
    def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
        """
        Sample a new target and reset internal episode state.

        Args:
            spacecraft_state: Current state vector [x, y, θ, vx, vy, ω].

        Returns:
            Dictionary with task metadata (target values, etc.).
        """

    @abstractmethod
    def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
        """
        Compute the task-specific observation vector from the spacecraft state.

        Args:
            spacecraft_state: Current state [x, y, θ, vx, vy, ω].

        Returns:
            1-D float32 array of length ``num_observations``.
        """

    @abstractmethod
    def compute_reward(
        self, spacecraft_state: np.ndarray
    ) -> Tuple[float, bool, Dict[str, Any]]:
        """
        Compute reward, done flag, and diagnostic info.

        Args:
            spacecraft_state: Current state [x, y, θ, vx, vy, ω].

        Returns:
            (reward, done, info) tuple.
        """

    # ------------------------------------------------------------------
    # Common helpers
    # ------------------------------------------------------------------

    @property
    def num_observations(self) -> int:
        """Size of the task-specific state observation vector."""
        return 0

    @property
    def name(self) -> str:
        return self.__class__.__name__

    # ------------------------------------------------------------------
    # Shared reward primitives  (from RANS paper Sec. IV-B)
    # ------------------------------------------------------------------

    @staticmethod
    def _reward_exponential(error: float, sigma: float) -> float:
        """exp(-error² / (2·σ²))  — tight peak near zero."""
        return math.exp(-(error**2) / max(2.0 * sigma**2, 1e-9))

    @staticmethod
    def _reward_inverse(error: float) -> float:
        """1 / (1 + error)  — smooth monotone decay."""
        return 1.0 / (1.0 + error)

    @staticmethod
    def _wrap_angle(angle: float) -> float:
        """Wrap angle to (−π, π]."""
        return (angle + math.pi) % (2.0 * math.pi) - math.pi

    @staticmethod
    def _world_to_body(dx: float, dy: float, theta: float) -> Tuple[float, float]:
        """Rotate world-frame displacement into body frame."""
        c, s = math.cos(theta), math.sin(theta)
        return c * dx + s * dy, -s * dx + c * dy