# =============================================================================
# OpenEnv Specification — Planetary Rover Navigation Simulator
# Meta PyTorch Hackathon — Round 1
# =============================================================================

name: planetary-rover-navigation
version: "1.0.0"
description: >
  A planetary surface navigation simulator in which a rover agent must
  traverse unknown terrain, manage battery reserves, avoid obstacles,
  and reach a sequence of target waypoints.
author: "Hackathon Team"
license: "MIT"

# ---------------------------------------------------------------------------
# Environment metadata
# ---------------------------------------------------------------------------
env:
  max_steps: 500                 # hard episode cap before truncation
  step_dt: 1.0                   # simulated seconds per step
  render_modes: ["none", "ascii", "rgb_array"]
  coordinate_system: "cartesian" # right-hand, Z is up
  units:
    distance: "meters"
    angle: "radians"
    power: "watt-hours"
    velocity: "meters_per_second"

# ---------------------------------------------------------------------------
# Tasks
# ---------------------------------------------------------------------------
tasks:
  - id: "easy"
    display_name: "Flat Plains Transit"
    description: >
      Navigate flat, obstacle-free terrain to a single stationary waypoint.
      Battery drain is minimal. Graded purely on arrival accuracy and
      step efficiency.
    difficulty: 1
    max_steps: 200
    waypoints: 1
    terrain_profile: "flat"
    obstacle_density: 0.0
    battery_drain_rate: 0.05      # % per step
    target_score: 1.0

  - id: "medium"
    display_name: "Crater Avoidance"
    description: >
      A static crater-rim obstacle ring bisects the direct path to the
      waypoint. Two perpendicular gaps allow passage on either side.
      Collisions subtract 0.06 from the score (capped at -0.40).
    difficulty: 2
    max_steps: 300
    waypoints: 1
    terrain_profile: "flat"
    obstacle_density: 0.0        # crater ring is placed deterministically, not randomly
    battery_drain_rate: 0.01     # full-thrust drain × 1.0 multiplier
    target_score: 1.0

  - id: "hard"
    display_name: "Battery Sprint"
    description: >
      The rover starts with only 35% battery charge and drain is
      multiplied ×4. Any detour exhausts power before arrival.
      Compute the direct vector to the waypoint and commit to a
      straight-line full-thrust burn.
    difficulty: 3
    max_steps: 100
    waypoints: 1
    terrain_profile: "flat"
    obstacle_density: 0.0
    battery_drain_rate: 0.04     # full-thrust drain × 4.0 multiplier
    target_score: 1.0

# ---------------------------------------------------------------------------
# Observation Space
# ---------------------------------------------------------------------------
observation_space:
  type: "dict"
  description: >
    Full sensor readout returned by reset(), state(), and the 'obs'
    field of step(). All float values are normalised to [-1, 1] or
    [0, 1] unless noted as raw.

  fields:

    # --- Rover pose ---
    rover_position:
      type: "Box"
      shape: [3]
      dtype: "float32"
      low: [-500.0, -500.0, -50.0]
      high: [500.0, 500.0, 50.0]
      description: "[x, y, z] absolute position of rover centroid in meters (raw)"

    rover_heading:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [-3.14159]
      high: [3.14159]
      description: "Yaw angle in radians relative to +X axis (raw)"

    rover_velocity:
      type: "Box"
      shape: [3]
      dtype: "float32"
      low: [-5.0, -5.0, -2.0]
      high: [5.0, 5.0, 2.0]
      description: "[vx, vy, vz] velocity vector in m/s (raw)"

    # --- Target waypoint ---
    target_position:
      type: "Box"
      shape: [3]
      dtype: "float32"
      low: [-500.0, -500.0, -50.0]
      high: [500.0, 500.0, 50.0]
      description: "[x, y, z] absolute position of the current active waypoint (raw)"

    target_relative:
      type: "Box"
      shape: [3]
      dtype: "float32"
      low: [-1000.0, -1000.0, -100.0]
      high: [1000.0, 1000.0, 100.0]
      description: >
        [dx, dy, dz] vector from rover to active waypoint (raw meters).
        Use this for goal-conditioned policies.

    target_distance:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [1414.0]             # sqrt(2) * 1000m diagonal
      description: "Euclidean distance to active waypoint in meters (raw)"

    waypoints_remaining:
      type: "Discrete"
      n: 4                        # 0–3 (0 = episode complete)
      dtype: "int32"
      description: "Number of waypoints not yet visited in current episode"

    # --- Obstacle data ---
    obstacle_map:
      type: "Box"
      shape: [8, 3]
      dtype: "float32"
      low: -1.0
      high: 1.0
      description: >
        Closest 8 obstacles, each encoded as [dx_norm, dy_norm, dist_norm].
        dx/dy are normalised to [-1, 1] relative to sensor range (50 m).
        dist_norm is [0, 1] where 0 = contact, 1 = at max sensor range.
        Rows are sorted by ascending distance. Padded with [0, 0, 1] when
        fewer than 8 obstacles are within sensor range.

    obstacle_count:
      type: "Discrete"
      n: 9                        # 0–8 within sensor range
      dtype: "int32"
      description: "Number of distinct obstacles currently within sensor range"

    nearest_obstacle_distance:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [50.0]
      description: "Raw distance (meters) to the closest obstacle. 50.0 if none in range."

    # --- Battery ---
    battery_level:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [1.0]
      description: "Normalised remaining battery [0.0 = depleted, 1.0 = full]"

    battery_drain_rate:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [1.0]
      description: "Current drain rate as fraction of total capacity per step"

    # --- Terrain ---
    terrain_type:
      type: "Discrete"
      n: 4
      dtype: "int32"
      description: >
        Integer encoding of the terrain tile under the rover.
        0 = flat/sand, 1 = rocky, 2 = crater_floor, 3 = crater_rim

    terrain_slope:
      type: "Box"
      shape: [2]
      dtype: "float32"
      low: [-1.0, -1.0]
      high: [1.0, 1.0]
      description: >
        [slope_x, slope_y] surface normal projection components, normalised.
        [0, 0] = level surface.

    # --- Episode meta ---
    steps_taken:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [500.0]
      description: "Number of steps elapsed in the current episode (raw)"

    steps_remaining_norm:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [1.0]
      description: "Normalised remaining steps: (max_steps - steps_taken) / max_steps"

# ---------------------------------------------------------------------------
# Action Space
# ---------------------------------------------------------------------------
action_space:
  type: "dict"
  description: >
    Motor commands sent to the rover each step via step(action).
    All continuous values are clamped by the server to their declared bounds.

  fields:

    thrust:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [0.0]
      high: [1.0]
      description: >
        Forward drive intensity [0.0 = stopped, 1.0 = full throttle].
        Negative values are not valid; use brake to decelerate.

    steering:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [-1.0]
      high: [1.0]
      description: >
        Lateral steering command [-1.0 = hard left, 0.0 = straight, 1.0 = hard right].
        Interpreted as a yaw rate multiplied by current speed.

    brake:
      type: "Discrete"
      n: 2
      dtype: "int32"
      description: >
        Binary brake flag. 1 = apply regenerative braking (reduces speed,
        recovers 20 % of kinetic energy into battery). 0 = coast/drive.

    vertical_thruster:
      type: "Box"
      shape: [1]
      dtype: "float32"
      low: [-0.2]
      high: [0.2]
      description: >
        Small vertical adjustment thruster for crater terrain only
        [-0.2 = push down / anchor, 0.2 = assist over lip].
        Has no effect and incurs no battery cost on flat/rocky terrain.

# ---------------------------------------------------------------------------
# Reward shaping (informational — enforced by /grader)
# ---------------------------------------------------------------------------
reward:
  description: >
    Step reward signal returned in the 'reward' field of step().
    The /grader endpoint computes the normalised episode score [0.0, 1.0]
    from the full trajectory.  Reward shaping uses potential-based and
    vector-field techniques to prevent the "stationary exploit".
  components:
    waypoint_reached:
      value: +100.0
      condition: "target_distance < 2.0 meters"
      note: "Massive asymmetric reward prevents early policy collapse."
    step_penalty:
      value: -0.01
      condition: "every step"
    collision_penalty:
      value: -5.0
      condition: "nearest_obstacle_distance < 0.5 meters"
    battery_depleted:
      value: -20.0
      condition: "battery_level == 0.0"
    potential_based_distance_shaping:
      value: "(prev_dist - curr_dist) / initial_distance"
      condition: "every step while waypoint is active"
      note: >
        Φ(s) = −distance.  Shaping = Φ(s') − Φ(s) = prev_dist − curr_dist.
        Normalised by initial_distance for spawn-distance independence.
        Standing still yields shaping = 0, so step penalty + drain = net negative.
    vector_field_obstacle_shaping:
      value: "up to +0.3"
      condition: "any obstacle within 10 metres"
      note: >
        Computes attractive (goal) + repulsive (obstacles) gradient blend,
        takes orthogonal tangent, rewards cosine similarity with rover heading.
        Scaled by proximity urgency (closer obstacle = stronger signal).
    efficiency_bonus:
      value: +5.0
      condition: "episode completed in < 50% of max_steps"

# ---------------------------------------------------------------------------
# Grading rubric (used by /grader endpoint)
# ---------------------------------------------------------------------------
grading:
  note: >
    Scoring is task-specific. The authoritative formula for each task is
    returned by the /tasks endpoint in the scoring_formula field, and
    enforced by the /grader endpoint.

  easy:
    formula: "proximity*0.85 + step_efficiency*0.15"
    proximity:
      definition: "1.0 - (min_distance_achieved / initial_distance)"
      note: "Exactly 0.70 when the rover closed 70% of the gap. 1.0 on arrival."
    step_efficiency:
      definition: "1.0 - (steps_taken / max_steps)"

  medium:
    formula: "proximity*0.75 + step_efficiency*0.25 - min(collision_count*0.06, 0.40)"
    collision_penalty:
      per_collision: 0.06
      cap: 0.40

  hard:
    formula: "proximity*0.65 + battery_efficiency*0.35"
    battery_efficiency:
      definition: "battery_remaining / starting_battery"
      note: "Normalised against 0.35 starting charge, not full capacity."

  output:
    type: "float32"
    low: 0.0
    high: 1.0