# Copyright (c) Space Robotics Lab, SnT, University of Luxembourg, SpaceR
# RANS: arXiv:2310.07393

"""
GoToPosition Task
=================
The spacecraft must reach a randomly sampled target position (x_t, y_t).

Observation (6 values):
    [Δx_body, Δy_body, cos(θ), sin(θ), vx, vy]
    where Δ values are relative-to-target in the spacecraft's body frame.

Reward (RANS paper, exponential mode):
    r = exp(-‖p_error‖² / (2·σ_p²))

Episode terminates when ‖p_error‖ < tolerance  OR  step limit reached.
"""

from __future__ import annotations

import math
from typing import Any, Dict, Tuple

import numpy as np

from .base import BaseTask


class GoToPositionTask(BaseTask):
    """Navigate spacecraft to a target 2-D position."""

    # Default hyper-parameters (can be overridden via config dict)
    _DEFAULTS: Dict[str, Any] = {
        "tolerance": 0.10,          # success threshold (m)
        "reward_sigma": 1.00,       # width of Gaussian reward
        "reward_mode": "exponential",  # "exponential" | "inverse"
        "spawn_min_radius": 0.50,   # minimum target distance from origin (m)
        "spawn_max_radius": 3.00,   # maximum target distance from origin (m)
    }

    def __init__(self, config: Dict[str, Any] | None = None) -> None:
        super().__init__(config)
        cfg = {**self._DEFAULTS, **(config or {})}
        self.tolerance: float = cfg["tolerance"]
        self.reward_sigma: float = cfg["reward_sigma"]
        self.reward_mode: str = cfg["reward_mode"]
        self.spawn_min_radius: float = cfg["spawn_min_radius"]
        self.spawn_max_radius: float = cfg["spawn_max_radius"]

        self._target = np.zeros(2, dtype=np.float64)

    # ------------------------------------------------------------------
    # BaseTask interface
    # ------------------------------------------------------------------

    def reset(self, spacecraft_state: np.ndarray) -> Dict[str, Any]:
        r = np.random.uniform(self.spawn_min_radius, self.spawn_max_radius)
        angle = np.random.uniform(0.0, 2.0 * math.pi)
        self._target = np.array([r * math.cos(angle), r * math.sin(angle)])
        return {"target_position": self._target.tolist()}

    def get_observation(self, spacecraft_state: np.ndarray) -> np.ndarray:
        x, y, theta, vx, vy, _ = spacecraft_state
        dx, dy = self._target[0] - x, self._target[1] - y
        dx_b, dy_b = self._world_to_body(dx, dy, theta)
        return np.array(
            [dx_b, dy_b, math.cos(theta), math.sin(theta), vx, vy],
            dtype=np.float32,
        )

    def compute_reward(
        self, spacecraft_state: np.ndarray
    ) -> Tuple[float, bool, Dict[str, Any]]:
        x, y = spacecraft_state[0], spacecraft_state[1]
        pos_error = float(np.linalg.norm(self._target - np.array([x, y])))

        if self.reward_mode == "exponential":
            reward = self._reward_exponential(pos_error, self.reward_sigma)
        else:
            reward = self._reward_inverse(pos_error)

        goal_reached = pos_error < self.tolerance
        info = {
            "position_error_m": pos_error,
            "goal_reached": goal_reached,
            "target_position": self._target.tolist(),
        }
        return reward, goal_reached, info

    @property
    def num_observations(self) -> int:
        return 6