| |
| |
| |
| |
|
|
| name: planetary-rover-navigation |
| version: "1.0.0" |
| description: > |
| A planetary surface navigation simulator in which a rover agent must |
| traverse unknown terrain, manage battery reserves, avoid obstacles, |
| and reach a sequence of target waypoints. |
| author: "Hackathon Team" |
| license: "MIT" |
|
|
| |
| |
| |
| env: |
| max_steps: 500 |
| step_dt: 1.0 |
| render_modes: ["none", "ascii", "rgb_array"] |
| coordinate_system: "cartesian" |
| units: |
| distance: "meters" |
| angle: "radians" |
| power: "watt-hours" |
| velocity: "meters_per_second" |
|
|
| |
| |
| |
| tasks: |
| - id: "easy" |
| display_name: "Flat Plains Transit" |
| description: > |
| Navigate flat, obstacle-free terrain to a single stationary waypoint. |
| Battery drain is minimal. Graded purely on arrival accuracy and |
| step efficiency. |
| difficulty: 1 |
| max_steps: 200 |
| waypoints: 1 |
| terrain_profile: "flat" |
| obstacle_density: 0.0 |
| battery_drain_rate: 0.05 |
| target_score: 1.0 |
|
|
| - id: "medium" |
| display_name: "Crater Avoidance" |
| description: > |
| A static crater-rim obstacle ring bisects the direct path to the |
| waypoint. Two perpendicular gaps allow passage on either side. |
| Collisions subtract 0.06 from the score (capped at -0.40). |
| difficulty: 2 |
| max_steps: 300 |
| waypoints: 1 |
| terrain_profile: "flat" |
| obstacle_density: 0.0 |
| battery_drain_rate: 0.01 |
| target_score: 1.0 |
|
|
| - id: "hard" |
| display_name: "Battery Sprint" |
| description: > |
| The rover starts with only 35% battery charge and drain is |
| multiplied ×4. Any detour exhausts power before arrival. |
| Compute the direct vector to the waypoint and commit to a |
| straight-line full-thrust burn. |
| difficulty: 3 |
| max_steps: 100 |
| waypoints: 1 |
| terrain_profile: "flat" |
| obstacle_density: 0.0 |
| battery_drain_rate: 0.04 |
| target_score: 1.0 |
|
|
| |
| |
| |
| observation_space: |
| type: "dict" |
| description: > |
| Full sensor readout returned by reset(), state(), and the 'obs' |
| field of step(). All float values are normalised to [-1, 1] or |
| [0, 1] unless noted as raw. |
| |
| fields: |
|
|
| |
| rover_position: |
| type: "Box" |
| shape: [3] |
| dtype: "float32" |
| low: [-500.0, -500.0, -50.0] |
| high: [500.0, 500.0, 50.0] |
| description: "[x, y, z] absolute position of rover centroid in meters (raw)" |
|
|
| rover_heading: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [-3.14159] |
| high: [3.14159] |
| description: "Yaw angle in radians relative to +X axis (raw)" |
|
|
| rover_velocity: |
| type: "Box" |
| shape: [3] |
| dtype: "float32" |
| low: [-5.0, -5.0, -2.0] |
| high: [5.0, 5.0, 2.0] |
| description: "[vx, vy, vz] velocity vector in m/s (raw)" |
|
|
| |
| target_position: |
| type: "Box" |
| shape: [3] |
| dtype: "float32" |
| low: [-500.0, -500.0, -50.0] |
| high: [500.0, 500.0, 50.0] |
| description: "[x, y, z] absolute position of the current active waypoint (raw)" |
|
|
| target_relative: |
| type: "Box" |
| shape: [3] |
| dtype: "float32" |
| low: [-1000.0, -1000.0, -100.0] |
| high: [1000.0, 1000.0, 100.0] |
| description: > |
| [dx, dy, dz] vector from rover to active waypoint (raw meters). |
| Use this for goal-conditioned policies. |
| |
| target_distance: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [1414.0] |
| description: "Euclidean distance to active waypoint in meters (raw)" |
|
|
| waypoints_remaining: |
| type: "Discrete" |
| n: 4 |
| dtype: "int32" |
| description: "Number of waypoints not yet visited in current episode" |
|
|
| |
| obstacle_map: |
| type: "Box" |
| shape: [8, 3] |
| dtype: "float32" |
| low: -1.0 |
| high: 1.0 |
| description: > |
| Closest 8 obstacles, each encoded as [dx_norm, dy_norm, dist_norm]. |
| dx/dy are normalised to [-1, 1] relative to sensor range (50 m). |
| dist_norm is [0, 1] where 0 = contact, 1 = at max sensor range. |
| Rows are sorted by ascending distance. Padded with [0, 0, 1] when |
| fewer than 8 obstacles are within sensor range. |
| |
| obstacle_count: |
| type: "Discrete" |
| n: 9 |
| dtype: "int32" |
| description: "Number of distinct obstacles currently within sensor range" |
|
|
| nearest_obstacle_distance: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [50.0] |
| description: "Raw distance (meters) to the closest obstacle. 50.0 if none in range." |
|
|
| |
| battery_level: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [1.0] |
| description: "Normalised remaining battery [0.0 = depleted, 1.0 = full]" |
|
|
| battery_drain_rate: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [1.0] |
| description: "Current drain rate as fraction of total capacity per step" |
|
|
| |
| terrain_type: |
| type: "Discrete" |
| n: 4 |
| dtype: "int32" |
| description: > |
| Integer encoding of the terrain tile under the rover. |
| 0 = flat/sand, 1 = rocky, 2 = crater_floor, 3 = crater_rim |
| |
| terrain_slope: |
| type: "Box" |
| shape: [2] |
| dtype: "float32" |
| low: [-1.0, -1.0] |
| high: [1.0, 1.0] |
| description: > |
| [slope_x, slope_y] surface normal projection components, normalised. |
| [0, 0] = level surface. |
| |
| |
| steps_taken: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [500.0] |
| description: "Number of steps elapsed in the current episode (raw)" |
|
|
| steps_remaining_norm: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [1.0] |
| description: "Normalised remaining steps: (max_steps - steps_taken) / max_steps" |
|
|
| |
| |
| |
| action_space: |
| type: "dict" |
| description: > |
| Motor commands sent to the rover each step via step(action). |
| All continuous values are clamped by the server to their declared bounds. |
| |
| fields: |
|
|
| thrust: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [0.0] |
| high: [1.0] |
| description: > |
| Forward drive intensity [0.0 = stopped, 1.0 = full throttle]. |
| Negative values are not valid; use brake to decelerate. |
| |
| steering: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [-1.0] |
| high: [1.0] |
| description: > |
| Lateral steering command [-1.0 = hard left, 0.0 = straight, 1.0 = hard right]. |
| Interpreted as a yaw rate multiplied by current speed. |
| |
| brake: |
| type: "Discrete" |
| n: 2 |
| dtype: "int32" |
| description: > |
| Binary brake flag. 1 = apply regenerative braking (reduces speed, |
| recovers 20 % of kinetic energy into battery). 0 = coast/drive. |
| |
| vertical_thruster: |
| type: "Box" |
| shape: [1] |
| dtype: "float32" |
| low: [-0.2] |
| high: [0.2] |
| description: > |
| Small vertical adjustment thruster for crater terrain only |
| [-0.2 = push down / anchor, 0.2 = assist over lip]. |
| Has no effect and incurs no battery cost on flat/rocky terrain. |
| |
| |
| |
| |
| reward: |
| description: > |
| Step reward signal returned in the 'reward' field of step(). |
| The /grader endpoint computes the normalised episode score [0.0, 1.0] |
| from the full trajectory. Reward shaping uses potential-based and |
| vector-field techniques to prevent the "stationary exploit". |
| components: |
| waypoint_reached: |
| value: +100.0 |
| condition: "target_distance < 2.0 meters" |
| note: "Massive asymmetric reward prevents early policy collapse." |
| step_penalty: |
| value: -0.01 |
| condition: "every step" |
| collision_penalty: |
| value: -5.0 |
| condition: "nearest_obstacle_distance < 0.5 meters" |
| battery_depleted: |
| value: -20.0 |
| condition: "battery_level == 0.0" |
| potential_based_distance_shaping: |
| value: "(prev_dist - curr_dist) / initial_distance" |
| condition: "every step while waypoint is active" |
| note: > |
| Φ(s) = −distance. Shaping = Φ(s') − Φ(s) = prev_dist − curr_dist. |
| Normalised by initial_distance for spawn-distance independence. |
| Standing still yields shaping = 0, so step penalty + drain = net negative. |
| vector_field_obstacle_shaping: |
| value: "up to +0.3" |
| condition: "any obstacle within 10 metres" |
| note: > |
| Computes attractive (goal) + repulsive (obstacles) gradient blend, |
| takes orthogonal tangent, rewards cosine similarity with rover heading. |
| Scaled by proximity urgency (closer obstacle = stronger signal). |
| efficiency_bonus: |
| value: +5.0 |
| condition: "episode completed in < 50% of max_steps" |
|
|
| |
| |
| |
| grading: |
| note: > |
| Scoring is task-specific. The authoritative formula for each task is |
| returned by the /tasks endpoint in the scoring_formula field, and |
| enforced by the /grader endpoint. |
| |
| easy: |
| formula: "proximity*0.85 + step_efficiency*0.15" |
| proximity: |
| definition: "1.0 - (min_distance_achieved / initial_distance)" |
| note: "Exactly 0.70 when the rover closed 70% of the gap. 1.0 on arrival." |
| step_efficiency: |
| definition: "1.0 - (steps_taken / max_steps)" |
|
|
| medium: |
| formula: "proximity*0.75 + step_efficiency*0.25 - min(collision_count*0.06, 0.40)" |
| collision_penalty: |
| per_collision: 0.06 |
| cap: 0.40 |
|
|
| hard: |
| formula: "proximity*0.65 + battery_efficiency*0.35" |
| battery_efficiency: |
| definition: "battery_remaining / starting_battery" |
| note: "Normalised against 0.35 starting charge, not full capacity." |
|
|
| output: |
| type: "float32" |
| low: 0.0 |
| high: 1.0 |
|
|