# ============================================================================= # OpenEnv Specification — Planetary Rover Navigation Simulator # Meta PyTorch Hackathon — Round 1 # ============================================================================= name: planetary-rover-navigation version: "1.0.0" description: > A planetary surface navigation simulator in which a rover agent must traverse unknown terrain, manage battery reserves, avoid obstacles, and reach a sequence of target waypoints. author: "Hackathon Team" license: "MIT" # --------------------------------------------------------------------------- # Environment metadata # --------------------------------------------------------------------------- env: max_steps: 500 # hard episode cap before truncation step_dt: 1.0 # simulated seconds per step render_modes: ["none", "ascii", "rgb_array"] coordinate_system: "cartesian" # right-hand, Z is up units: distance: "meters" angle: "radians" power: "watt-hours" velocity: "meters_per_second" # --------------------------------------------------------------------------- # Tasks # --------------------------------------------------------------------------- tasks: - id: "easy" display_name: "Flat Plains Transit" description: > Navigate flat, obstacle-free terrain to a single stationary waypoint. Battery drain is minimal. Graded purely on arrival accuracy and step efficiency. difficulty: 1 max_steps: 200 waypoints: 1 terrain_profile: "flat" obstacle_density: 0.0 battery_drain_rate: 0.05 # % per step target_score: 1.0 - id: "medium" display_name: "Crater Avoidance" description: > A static crater-rim obstacle ring bisects the direct path to the waypoint. Two perpendicular gaps allow passage on either side. Collisions subtract 0.06 from the score (capped at -0.40). difficulty: 2 max_steps: 300 waypoints: 1 terrain_profile: "flat" obstacle_density: 0.0 # crater ring is placed deterministically, not randomly battery_drain_rate: 0.01 # full-thrust drain × 1.0 multiplier target_score: 1.0 - id: "hard" display_name: "Battery Sprint" description: > The rover starts with only 35% battery charge and drain is multiplied ×4. Any detour exhausts power before arrival. Compute the direct vector to the waypoint and commit to a straight-line full-thrust burn. difficulty: 3 max_steps: 100 waypoints: 1 terrain_profile: "flat" obstacle_density: 0.0 battery_drain_rate: 0.04 # full-thrust drain × 4.0 multiplier target_score: 1.0 # --------------------------------------------------------------------------- # Observation Space # --------------------------------------------------------------------------- observation_space: type: "dict" description: > Full sensor readout returned by reset(), state(), and the 'obs' field of step(). All float values are normalised to [-1, 1] or [0, 1] unless noted as raw. fields: # --- Rover pose --- rover_position: type: "Box" shape: [3] dtype: "float32" low: [-500.0, -500.0, -50.0] high: [500.0, 500.0, 50.0] description: "[x, y, z] absolute position of rover centroid in meters (raw)" rover_heading: type: "Box" shape: [1] dtype: "float32" low: [-3.14159] high: [3.14159] description: "Yaw angle in radians relative to +X axis (raw)" rover_velocity: type: "Box" shape: [3] dtype: "float32" low: [-5.0, -5.0, -2.0] high: [5.0, 5.0, 2.0] description: "[vx, vy, vz] velocity vector in m/s (raw)" # --- Target waypoint --- target_position: type: "Box" shape: [3] dtype: "float32" low: [-500.0, -500.0, -50.0] high: [500.0, 500.0, 50.0] description: "[x, y, z] absolute position of the current active waypoint (raw)" target_relative: type: "Box" shape: [3] dtype: "float32" low: [-1000.0, -1000.0, -100.0] high: [1000.0, 1000.0, 100.0] description: > [dx, dy, dz] vector from rover to active waypoint (raw meters). Use this for goal-conditioned policies. target_distance: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [1414.0] # sqrt(2) * 1000m diagonal description: "Euclidean distance to active waypoint in meters (raw)" waypoints_remaining: type: "Discrete" n: 4 # 0–3 (0 = episode complete) dtype: "int32" description: "Number of waypoints not yet visited in current episode" # --- Obstacle data --- obstacle_map: type: "Box" shape: [8, 3] dtype: "float32" low: -1.0 high: 1.0 description: > Closest 8 obstacles, each encoded as [dx_norm, dy_norm, dist_norm]. dx/dy are normalised to [-1, 1] relative to sensor range (50 m). dist_norm is [0, 1] where 0 = contact, 1 = at max sensor range. Rows are sorted by ascending distance. Padded with [0, 0, 1] when fewer than 8 obstacles are within sensor range. obstacle_count: type: "Discrete" n: 9 # 0–8 within sensor range dtype: "int32" description: "Number of distinct obstacles currently within sensor range" nearest_obstacle_distance: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [50.0] description: "Raw distance (meters) to the closest obstacle. 50.0 if none in range." # --- Battery --- battery_level: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [1.0] description: "Normalised remaining battery [0.0 = depleted, 1.0 = full]" battery_drain_rate: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [1.0] description: "Current drain rate as fraction of total capacity per step" # --- Terrain --- terrain_type: type: "Discrete" n: 4 dtype: "int32" description: > Integer encoding of the terrain tile under the rover. 0 = flat/sand, 1 = rocky, 2 = crater_floor, 3 = crater_rim terrain_slope: type: "Box" shape: [2] dtype: "float32" low: [-1.0, -1.0] high: [1.0, 1.0] description: > [slope_x, slope_y] surface normal projection components, normalised. [0, 0] = level surface. # --- Episode meta --- steps_taken: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [500.0] description: "Number of steps elapsed in the current episode (raw)" steps_remaining_norm: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [1.0] description: "Normalised remaining steps: (max_steps - steps_taken) / max_steps" # --------------------------------------------------------------------------- # Action Space # --------------------------------------------------------------------------- action_space: type: "dict" description: > Motor commands sent to the rover each step via step(action). All continuous values are clamped by the server to their declared bounds. fields: thrust: type: "Box" shape: [1] dtype: "float32" low: [0.0] high: [1.0] description: > Forward drive intensity [0.0 = stopped, 1.0 = full throttle]. Negative values are not valid; use brake to decelerate. steering: type: "Box" shape: [1] dtype: "float32" low: [-1.0] high: [1.0] description: > Lateral steering command [-1.0 = hard left, 0.0 = straight, 1.0 = hard right]. Interpreted as a yaw rate multiplied by current speed. brake: type: "Discrete" n: 2 dtype: "int32" description: > Binary brake flag. 1 = apply regenerative braking (reduces speed, recovers 20 % of kinetic energy into battery). 0 = coast/drive. vertical_thruster: type: "Box" shape: [1] dtype: "float32" low: [-0.2] high: [0.2] description: > Small vertical adjustment thruster for crater terrain only [-0.2 = push down / anchor, 0.2 = assist over lip]. Has no effect and incurs no battery cost on flat/rocky terrain. # --------------------------------------------------------------------------- # Reward shaping (informational — enforced by /grader) # --------------------------------------------------------------------------- reward: description: > Step reward signal returned in the 'reward' field of step(). The /grader endpoint computes the normalised episode score [0.0, 1.0] from the full trajectory. Reward shaping uses potential-based and vector-field techniques to prevent the "stationary exploit". components: waypoint_reached: value: +100.0 condition: "target_distance < 2.0 meters" note: "Massive asymmetric reward prevents early policy collapse." step_penalty: value: -0.01 condition: "every step" collision_penalty: value: -5.0 condition: "nearest_obstacle_distance < 0.5 meters" battery_depleted: value: -20.0 condition: "battery_level == 0.0" potential_based_distance_shaping: value: "(prev_dist - curr_dist) / initial_distance" condition: "every step while waypoint is active" note: > Φ(s) = −distance. Shaping = Φ(s') − Φ(s) = prev_dist − curr_dist. Normalised by initial_distance for spawn-distance independence. Standing still yields shaping = 0, so step penalty + drain = net negative. vector_field_obstacle_shaping: value: "up to +0.3" condition: "any obstacle within 10 metres" note: > Computes attractive (goal) + repulsive (obstacles) gradient blend, takes orthogonal tangent, rewards cosine similarity with rover heading. Scaled by proximity urgency (closer obstacle = stronger signal). efficiency_bonus: value: +5.0 condition: "episode completed in < 50% of max_steps" # --------------------------------------------------------------------------- # Grading rubric (used by /grader endpoint) # --------------------------------------------------------------------------- grading: note: > Scoring is task-specific. The authoritative formula for each task is returned by the /tasks endpoint in the scoring_formula field, and enforced by the /grader endpoint. easy: formula: "proximity*0.85 + step_efficiency*0.15" proximity: definition: "1.0 - (min_distance_achieved / initial_distance)" note: "Exactly 0.70 when the rover closed 70% of the gap. 1.0 on arrival." step_efficiency: definition: "1.0 - (steps_taken / max_steps)" medium: formula: "proximity*0.75 + step_efficiency*0.25 - min(collision_count*0.06, 0.40)" collision_penalty: per_collision: 0.06 cap: 0.40 hard: formula: "proximity*0.65 + battery_efficiency*0.35" battery_efficiency: definition: "battery_remaining / starting_battery" note: "Normalised against 0.35 starting charge, not full capacity." output: type: "float32" low: 0.0 high: 1.0