Spaces:

sohambose98
/

mini-rl-env

Sleeping

App Files Files Community

sohambose98 commited on Apr 5

Commit

eaa79f0

1 Parent(s): 3ac18bb

updated the tests and graders

Browse files

Files changed (25) hide show

grid_env/Server/__pycache__/__init__.cpython-313.pyc +0 -0
grid_env/Server/__pycache__/app.cpython-313.pyc +0 -0
grid_env/Server/__pycache__/warehouse_env.cpython-313.pyc +0 -0
grid_env/__init__.py +16 -1
grid_env/__pycache__/__init__.cpython-313.pyc +0 -0
grid_env/__pycache__/baseline.cpython-313.pyc +0 -0
grid_env/__pycache__/client.cpython-313.pyc +0 -0
grid_env/__pycache__/env.cpython-313.pyc +0 -0
grid_env/__pycache__/graders.cpython-313.pyc +0 -0
grid_env/__pycache__/models.cpython-313.pyc +0 -0
grid_env/__pycache__/tasks.cpython-313.pyc +0 -0
grid_env/baseline.py +9 -1
grid_env/env.py +109 -6
grid_env/graders.py +42 -7
grid_env/models.py +25 -1
grid_env/openv.yaml +15 -0
grid_env/tasks.py +387 -0
grid_env/tools.py +1 -0
openenv.yaml +15 -0
tests/conftest.py +35 -0
tests/test_baseline_stub.py +11 -1
tests/test_env_smoke.py +14 -4
tests/test_graders.py +145 -9
tests/test_server.py +12 -2
tests/test_tasks.py +32 -5

grid_env/Server/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/Server/__pycache__/__init__.cpython-313.pyc and b/grid_env/Server/__pycache__/__init__.cpython-313.pyc differ

grid_env/Server/__pycache__/app.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/Server/__pycache__/app.cpython-313.pyc and b/grid_env/Server/__pycache__/app.cpython-313.pyc differ

grid_env/Server/__pycache__/warehouse_env.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/Server/__pycache__/warehouse_env.cpython-313.pyc and b/grid_env/Server/__pycache__/warehouse_env.cpython-313.pyc differ

grid_env/__init__.py CHANGED Viewed

@@ -1,6 +1,16 @@
 from .client import WarehouseEnvClient
 from .env import WarehouseFulfillmentEnv, available_tasks
-from .graders import grade_easy, grade_episode, grade_hard, grade_medium
 from .models import (
     BaselineCommand,
     BinState,
@@ -30,8 +40,13 @@ __all__ = [
     "WarehouseReward",
     "WarehouseState",
     "available_tasks",
     "grade_easy",
     "grade_episode",
     "grade_hard",
     "grade_medium",
 ]

 from .client import WarehouseEnvClient
 from .env import WarehouseFulfillmentEnv, available_tasks
+from .graders import (
+    grade_budget_run,
+    grade_easy,
+    grade_episode,
+    grade_gauntlet,
+    grade_hard,
+    grade_heavy_lifting,
+    grade_medium,
+    grade_obstacle_course,
+    grade_stamina_run,
+)
 from .models import (
     BaselineCommand,
     BinState,
     "WarehouseReward",
     "WarehouseState",
     "available_tasks",
+    "grade_budget_run",
     "grade_easy",
     "grade_episode",
+    "grade_gauntlet",
     "grade_hard",
+    "grade_heavy_lifting",
     "grade_medium",
+    "grade_obstacle_course",
+    "grade_stamina_run",
 ]

grid_env/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/__init__.cpython-313.pyc and b/grid_env/__pycache__/__init__.cpython-313.pyc differ

grid_env/__pycache__/baseline.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/baseline.cpython-313.pyc and b/grid_env/__pycache__/baseline.cpython-313.pyc differ

grid_env/__pycache__/client.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/client.cpython-313.pyc and b/grid_env/__pycache__/client.cpython-313.pyc differ

grid_env/__pycache__/env.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/env.cpython-313.pyc and b/grid_env/__pycache__/env.cpython-313.pyc differ

grid_env/__pycache__/graders.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/graders.cpython-313.pyc and b/grid_env/__pycache__/graders.cpython-313.pyc differ

grid_env/__pycache__/models.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/models.cpython-313.pyc and b/grid_env/__pycache__/models.cpython-313.pyc differ

grid_env/__pycache__/tasks.cpython-313.pyc CHANGED Viewed

Binary files a/grid_env/__pycache__/tasks.cpython-313.pyc and b/grid_env/__pycache__/tasks.cpython-313.pyc differ

grid_env/baseline.py CHANGED Viewed

@@ -21,7 +21,7 @@ except ImportError:
 SYSTEM_PROMPT = """You control a warehouse fulfillment robot.
 Return exactly one JSON object with:
-- command: one of turn_left, turn_right, move_forward, scan_bin, pick_item, pack_item, recharge, wait
 - rationale: a short sentence
 Objective:
@@ -29,6 +29,14 @@ Objective:
 - Use scans before picks when the task requires verified bins.
 - Recharge before battery depletion if needed.
 - Avoid invalid actions and unnecessary wandering.
 """

 SYSTEM_PROMPT = """You control a warehouse fulfillment robot.
 Return exactly one JSON object with:
+- command: one of turn_left, turn_right, move_forward, scan_bin, pick_item, pack_item, recharge, rest, wait
 - rationale: a short sentence
 Objective:
 - Use scans before picks when the task requires verified bins.
 - Recharge before battery depletion if needed.
 - Avoid invalid actions and unnecessary wandering.
+Advanced mechanics (active on harder tasks):
+- Obstacles: some cells are impassable. If front_cell says "obstacle", turn to find another route.
+- Item weight: items have weight. If an item exceeds your carry capacity, you cannot pick it.
+  Heavier items drain more battery while moving.
+- Stamina: movement costs stamina. When stamina hits 0, movement costs double battery.
+  Use the "rest" action at the rest area to restore stamina.
+- Money: packing correct items earns money; wrong packs lose money. Hit the profit target if set.
 """

grid_env/env.py CHANGED Viewed

@@ -40,6 +40,7 @@ class WarehouseFulfillmentEnv:
         "pick_item",
         "pack_item",
         "recharge",
         "wait",
     ]
@@ -100,6 +101,8 @@ class WarehouseFulfillmentEnv:
             reward_value, narrative = self._pack_item(reward_value)
         elif command == "recharge":
             reward_value, narrative = self._recharge(reward_value)
         elif command == "wait":
             reward_value -= 0.01
             self.metrics.invalid_actions += 1
@@ -110,8 +113,8 @@ class WarehouseFulfillmentEnv:
             narrative = f"Unknown action: {command}."
         self.action_history.append(command)
-        self.done = self._all_order_lines_complete() or self.step_count >= self.task.max_steps
-        self.success = self._all_order_lines_complete()
         if self.success:
             reward_value += 0.50
             narrative = "Order fully packed and ready for dispatch."
@@ -143,11 +146,17 @@ class WarehouseFulfillmentEnv:
             agent_position=self.agent_position,
             heading=self.heading,
             carrying=self.carrying,
             battery_level=self.battery_level,
             battery_capacity=self.task.battery_capacity,
             dock_position=self.task.dock_position,
             pack_station_position=self.task.pack_station_position,
             charger_position=self.task.charger_position,
             bins=[self._clone_bin(bin_state) for bin_state in self.bins],
             order=[self._clone_order_line(line) for line in self.order],
             packed_order=[self._clone_order_line(line) for line in self.packed_order],
@@ -166,6 +175,9 @@ class WarehouseFulfillmentEnv:
         self.heading = self.task.agent_heading
         self.battery_level = self.task.battery_capacity
         self.carrying: Optional[str] = None
         self.step_count = 0
         self.done = False
         self.success = False
@@ -206,6 +218,8 @@ class WarehouseFulfillmentEnv:
         front = self._front_position()
         if not self._in_bounds(front):
             return "wall"
         front_bin = self._front_bin()
         if front_bin:
             return f"bin {front_bin.bin_id} ({front_bin.sku})"
@@ -215,18 +229,35 @@ class WarehouseFulfillmentEnv:
             return "charger"
         if front == self.task.dock_position:
             return "dock"
         return "aisle"
     def _move_forward(self, reward: float) -> Tuple[float, str]:
         next_pos = self._front_position()
         if not self._in_bounds(next_pos) or self._occupied(next_pos):
             self.metrics.invalid_actions += 1
             self._consume_battery(1)
             return reward - 0.08, "Forward move blocked by warehouse infrastructure."
         self.agent_position = next_pos
         self.metrics.distance_travelled += 1
-        self._consume_battery(2)
         return reward, f"Moved to aisle cell {self.agent_position}."
     def _scan_bin(self, reward: float) -> Tuple[float, str]:
@@ -260,13 +291,22 @@ class WarehouseFulfillmentEnv:
             self.metrics.invalid_actions += 1
             self._consume_battery(1)
             return reward - 0.10, f"Bin {bin_state.bin_id} is empty."
         self._consume_battery(1)
         bin_state.quantity -= 1
         self.carrying = bin_state.sku
         if self._remaining_quantity(bin_state.sku) > 0:
             self.metrics.correct_picks += 1
-            return reward + 0.20, f"Picked {bin_state.sku} from {bin_state.bin_id}."
         self.metrics.wrong_picks += 1
         return reward - 0.18, f"Picked {bin_state.sku}, which is not needed now."
@@ -285,18 +325,28 @@ class WarehouseFulfillmentEnv:
         remaining = self._remaining_quantity(self.carrying)
         if remaining <= 0:
             item = self.carrying
             self.carrying = None
             self.metrics.wrong_picks += 1
             return reward - 0.15, f"Packed extra unit of {item}; order did not require it."
         for packed_line in self.packed_order:
             if packed_line.sku == self.carrying:
                 packed_line.quantity += 1
                 break
         item = self.carrying
         self.carrying = None
         self.metrics.correct_packs += 1
-        return reward + 0.35, f"Packed {item} at the station."
     def _recharge(self, reward: float) -> Tuple[float, str]:
         if self._front_position() != self.task.charger_position:
@@ -312,6 +362,24 @@ class WarehouseFulfillmentEnv:
         self.metrics.recharges += 1
         return reward + benefit, "Battery restored to full capacity."
     def _build_observation(self, narrative: str) -> WarehouseObservation:
         nearby_bins = []
         for bin_state in self.bins:
@@ -334,7 +402,10 @@ class WarehouseFulfillmentEnv:
             heading=self.heading,
             front_cell=self._front_cell_label(),
             carrying=self.carrying,
             battery_level=self.battery_level,
             visible_bins=nearby_bins,
             pending_order=pending,
             packed_order=packed,
@@ -363,11 +434,43 @@ class WarehouseFulfillmentEnv:
         if previous > 0 and self.battery_level == 0:
             self.metrics.battery_depletion_events += 1
     def _in_bounds(self, position: Tuple[int, int]) -> bool:
         return 0 <= position[0] < self.grid_size[0] and 0 <= position[1] < self.grid_size[1]
     def _occupied(self, position: Tuple[int, int]) -> bool:
-        if position in {self.task.pack_station_position, self.task.charger_position, self.task.dock_position}:
             return True
         return any(bin_state.position == position for bin_state in self.bins)

         "pick_item",
         "pack_item",
         "recharge",
+        "rest",
         "wait",
     ]
             reward_value, narrative = self._pack_item(reward_value)
         elif command == "recharge":
             reward_value, narrative = self._recharge(reward_value)
+        elif command == "rest":
+            reward_value, narrative = self._rest(reward_value)
         elif command == "wait":
             reward_value -= 0.01
             self.metrics.invalid_actions += 1
             narrative = f"Unknown action: {command}."
         self.action_history.append(command)
+        self.done = self._is_episode_complete() or self.step_count >= self.task.max_steps
+        self.success = self._is_episode_complete()
         if self.success:
             reward_value += 0.50
             narrative = "Order fully packed and ready for dispatch."
             agent_position=self.agent_position,
             heading=self.heading,
             carrying=self.carrying,
+            carrying_weight=self.carrying_weight,
             battery_level=self.battery_level,
             battery_capacity=self.task.battery_capacity,
+            stamina_level=self.stamina_level,
+            stamina_capacity=self.task.stamina_capacity,
+            money=round(self.money, 2),
+            profit_target=self.task.profit_target,
             dock_position=self.task.dock_position,
             pack_station_position=self.task.pack_station_position,
             charger_position=self.task.charger_position,
+            obstacles=list(self.task.obstacles),
             bins=[self._clone_bin(bin_state) for bin_state in self.bins],
             order=[self._clone_order_line(line) for line in self.order],
             packed_order=[self._clone_order_line(line) for line in self.packed_order],
         self.heading = self.task.agent_heading
         self.battery_level = self.task.battery_capacity
         self.carrying: Optional[str] = None
+        self.carrying_weight: int = 0
+        self.stamina_level: int = self.task.stamina_capacity
+        self.money: float = 0.0
         self.step_count = 0
         self.done = False
         self.success = False
         front = self._front_position()
         if not self._in_bounds(front):
             return "wall"
+        if self._is_obstacle(front):
+            return "obstacle"
         front_bin = self._front_bin()
         if front_bin:
             return f"bin {front_bin.bin_id} ({front_bin.sku})"
             return "charger"
         if front == self.task.dock_position:
             return "dock"
+        if self.task.rest_position and front == self.task.rest_position:
+            return "rest area"
         return "aisle"
     def _move_forward(self, reward: float) -> Tuple[float, str]:
         next_pos = self._front_position()
+        if self._is_obstacle(next_pos):
+            self.metrics.obstacle_collisions += 1
+            self.metrics.invalid_actions += 1
+            self._consume_battery(1)
+            return reward - 0.12, "Blocked by an obstacle! Find another route."
         if not self._in_bounds(next_pos) or self._occupied(next_pos):
             self.metrics.invalid_actions += 1
             self._consume_battery(1)
             return reward - 0.08, "Forward move blocked by warehouse infrastructure."
+        battery_cost = 2
+        weight_penalty = self.carrying_weight if self.carrying_weight > 1 else 0
+        battery_cost += weight_penalty
+        if self._has_stamina() and self.stamina_level <= 0:
+            battery_cost *= 2
         self.agent_position = next_pos
         self.metrics.distance_travelled += 1
+        self._consume_battery(battery_cost)
+        self._consume_stamina(self.task.stamina_move_cost)
         return reward, f"Moved to aisle cell {self.agent_position}."
     def _scan_bin(self, reward: float) -> Tuple[float, str]:
             self.metrics.invalid_actions += 1
             self._consume_battery(1)
             return reward - 0.10, f"Bin {bin_state.bin_id} is empty."
+        if bin_state.weight > self.task.carry_capacity:
+            self.metrics.overweight_attempts += 1
+            self.metrics.invalid_actions += 1
+            self._consume_battery(1)
+            return reward - 0.12, (
+                f"Item {bin_state.sku} weighs {bin_state.weight} but carry capacity "
+                f"is {self.task.carry_capacity}. Too heavy!"
+            )
         self._consume_battery(1)
         bin_state.quantity -= 1
         self.carrying = bin_state.sku
+        self.carrying_weight = bin_state.weight
         if self._remaining_quantity(bin_state.sku) > 0:
             self.metrics.correct_picks += 1
+            return reward + 0.20, f"Picked {bin_state.sku} (weight {bin_state.weight}) from {bin_state.bin_id}."
         self.metrics.wrong_picks += 1
         return reward - 0.18, f"Picked {bin_state.sku}, which is not needed now."
         remaining = self._remaining_quantity(self.carrying)
         if remaining <= 0:
             item = self.carrying
+            item_value = self._item_value(item)
             self.carrying = None
+            self.carrying_weight = 0
             self.metrics.wrong_picks += 1
+            if item_value > 0:
+                self.money -= item_value * 0.5
+                self.metrics.money_lost += item_value * 0.5
             return reward - 0.15, f"Packed extra unit of {item}; order did not require it."
+        item_value = self._item_value(self.carrying)
         for packed_line in self.packed_order:
             if packed_line.sku == self.carrying:
                 packed_line.quantity += 1
                 break
         item = self.carrying
         self.carrying = None
+        self.carrying_weight = 0
         self.metrics.correct_packs += 1
+        if item_value > 0:
+            self.money += item_value
+            self.metrics.money_earned += item_value
+        return reward + 0.35, f"Packed {item} at the station. (+${item_value:.2f})"
     def _recharge(self, reward: float) -> Tuple[float, str]:
         if self._front_position() != self.task.charger_position:
         self.metrics.recharges += 1
         return reward + benefit, "Battery restored to full capacity."
+    def _rest(self, reward: float) -> Tuple[float, str]:
+        if not self._has_stamina():
+            self.metrics.invalid_actions += 1
+            return reward - 0.03, "This task has no stamina mechanic."
+        if self.task.rest_position and self._front_position() != self.task.rest_position:
+            self.metrics.invalid_actions += 1
+            self._consume_battery(1)
+            return reward - 0.08, "Rest action requires facing the rest area."
+        if self.stamina_level >= self.task.stamina_capacity:
+            return reward - 0.03, "Stamina already full."
+        benefit = 0.06 if self.stamina_level <= self.task.stamina_capacity // 4 else -0.02
+        self.stamina_level = self.task.stamina_capacity
+        self.metrics.rest_events += 1
+        return reward + benefit, "Stamina restored to full capacity."
     def _build_observation(self, narrative: str) -> WarehouseObservation:
         nearby_bins = []
         for bin_state in self.bins:
             heading=self.heading,
             front_cell=self._front_cell_label(),
             carrying=self.carrying,
+            carrying_weight=self.carrying_weight,
             battery_level=self.battery_level,
+            stamina_level=self.stamina_level,
+            money=round(self.money, 2),
             visible_bins=nearby_bins,
             pending_order=pending,
             packed_order=packed,
         if previous > 0 and self.battery_level == 0:
             self.metrics.battery_depletion_events += 1
+    def _consume_stamina(self, amount: int) -> None:
+        if not self._has_stamina():
+            return
+        previous = self.stamina_level
+        self.stamina_level = max(0, self.stamina_level - amount)
+        if previous > 0 and self.stamina_level == 0:
+            self.metrics.stamina_depletion_events += 1
+    def _has_stamina(self) -> bool:
+        return self.task.stamina_capacity > 0
+    def _is_obstacle(self, position: Tuple[int, int]) -> bool:
+        return tuple(position) in {tuple(o) for o in self.task.obstacles}
+    def _item_value(self, sku: str) -> float:
+        for bin_state in self.task.bins:
+            if bin_state.sku == sku:
+                return bin_state.value
+        return 0.0
+    def _is_episode_complete(self) -> bool:
+        if not self._all_order_lines_complete():
+            return False
+        if self.task.profit_target > 0 and self.money < self.task.profit_target:
+            return False
+        return True
     def _in_bounds(self, position: Tuple[int, int]) -> bool:
         return 0 <= position[0] < self.grid_size[0] and 0 <= position[1] < self.grid_size[1]
     def _occupied(self, position: Tuple[int, int]) -> bool:
+        fixed = {self.task.pack_station_position, self.task.charger_position, self.task.dock_position}
+        if self.task.rest_position:
+            fixed.add(self.task.rest_position)
+        if position in fixed:
+            return True
+        if self._is_obstacle(position):
             return True
         return any(bin_state.position == position for bin_state in self.bins)

grid_env/graders.py CHANGED Viewed

@@ -224,6 +224,12 @@ def _build_action_log(state: WarehouseState) -> List[Dict[str, Any]]:
                 "recharges": metrics.recharges,
                 "battery_depletion_events": metrics.battery_depletion_events,
                 "distance_travelled": metrics.distance_travelled,
             },
             "result": "",
         }
@@ -255,11 +261,40 @@ def grade_hard(state: WarehouseState) -> float:
     return _grade_task("hard_restock_priority", state)
 def grade_episode(state: WarehouseState) -> float:
-    if state.task_id == "easy_single_pick":
-        return grade_easy(state)
-    if state.task_id == "medium_multi_item":
-        return grade_medium(state)
-    if state.task_id == "hard_restock_priority":
-        return grade_hard(state)
-    raise KeyError(f"No grader for task_id: {state.task_id}")

                 "recharges": metrics.recharges,
                 "battery_depletion_events": metrics.battery_depletion_events,
                 "distance_travelled": metrics.distance_travelled,
+                "stamina_depletion_events": metrics.stamina_depletion_events,
+                "rest_events": metrics.rest_events,
+                "obstacle_collisions": metrics.obstacle_collisions,
+                "money_earned": metrics.money_earned,
+                "money_lost": metrics.money_lost,
+                "overweight_attempts": metrics.overweight_attempts,
             },
             "result": "",
         }
     return _grade_task("hard_restock_priority", state)
+def grade_obstacle_course(state: WarehouseState) -> float:
+    return _grade_task("obstacle_course", state)
+def grade_heavy_lifting(state: WarehouseState) -> float:
+    return _grade_task("heavy_lifting", state)
+def grade_stamina_run(state: WarehouseState) -> float:
+    return _grade_task("stamina_run", state)
+def grade_budget_run(state: WarehouseState) -> float:
+    return _grade_task("budget_run", state)
+def grade_gauntlet(state: WarehouseState) -> float:
+    return _grade_task("gauntlet", state)
+_GRADER_DISPATCH = {
+    "easy_single_pick": grade_easy,
+    "medium_multi_item": grade_medium,
+    "hard_restock_priority": grade_hard,
+    "obstacle_course": grade_obstacle_course,
+    "heavy_lifting": grade_heavy_lifting,
+    "stamina_run": grade_stamina_run,
+    "budget_run": grade_budget_run,
+    "gauntlet": grade_gauntlet,
+}
 def grade_episode(state: WarehouseState) -> float:
+    grader = _GRADER_DISPATCH.get(state.task_id)
+    if grader is None:
+        raise KeyError(f"No grader for task_id: {state.task_id}")
+    return grader(state)

grid_env/models.py CHANGED Viewed

@@ -62,6 +62,7 @@ Command = Literal[
     "pick_item",
     "pack_item",
     "recharge",
     "wait",
 ]
@@ -84,11 +85,13 @@ class BinState(OpenEnvModel):
     position: Position
     sku: str
     quantity: int
 class TaskDefinition(OpenEnvModel):
     task_id: str
-    difficulty: Literal["easy", "medium", "hard"]
     title: str
     description: str
     max_steps: int
@@ -103,6 +106,12 @@ class TaskDefinition(OpenEnvModel):
     order: List[OrderLine]
     required_scans: List[str] = Field(default_factory=list)
     rubric_criteria: List[Dict[str, str]] = Field(default_factory=list)
 class PendingOrderLine(OpenEnvModel):
@@ -123,7 +132,10 @@ class WarehouseObservation(Observation, OpenEnvModel):
     heading: Heading
     front_cell: str
     carrying: Optional[str]
     battery_level: int
     visible_bins: List[str]
     pending_order: List[PendingOrderLine]
     packed_order: List[PackedOrderLine]
@@ -146,6 +158,12 @@ class WarehouseMetrics(OpenEnvModel):
     recharges: int = 0
     battery_depletion_events: int = 0
     distance_travelled: int = 0
 class WarehouseState(State, OpenEnvModel):
@@ -160,11 +178,17 @@ class WarehouseState(State, OpenEnvModel):
     agent_position: Position
     heading: Heading
     carrying: Optional[str]
     battery_level: int
     battery_capacity: int
     dock_position: Position
     pack_station_position: Position
     charger_position: Position
     bins: List[BinState]
     order: List[OrderLine]
     packed_order: List[OrderLine]

     "pick_item",
     "pack_item",
     "recharge",
+    "rest",
     "wait",
 ]
     position: Position
     sku: str
     quantity: int
+    weight: int = 1
+    value: float = 0.0
 class TaskDefinition(OpenEnvModel):
     task_id: str
+    difficulty: Literal["easy", "medium", "hard", "expert"]
     title: str
     description: str
     max_steps: int
     order: List[OrderLine]
     required_scans: List[str] = Field(default_factory=list)
     rubric_criteria: List[Dict[str, str]] = Field(default_factory=list)
+    obstacles: List[Position] = Field(default_factory=list)
+    carry_capacity: int = 99
+    stamina_capacity: int = 0
+    stamina_move_cost: int = 1
+    rest_position: Optional[Position] = None
+    profit_target: float = 0.0
 class PendingOrderLine(OpenEnvModel):
     heading: Heading
     front_cell: str
     carrying: Optional[str]
+    carrying_weight: int = 0
     battery_level: int
+    stamina_level: int = 0
+    money: float = 0.0
     visible_bins: List[str]
     pending_order: List[PendingOrderLine]
     packed_order: List[PackedOrderLine]
     recharges: int = 0
     battery_depletion_events: int = 0
     distance_travelled: int = 0
+    stamina_depletion_events: int = 0
+    rest_events: int = 0
+    obstacle_collisions: int = 0
+    money_earned: float = 0.0
+    money_lost: float = 0.0
+    overweight_attempts: int = 0
 class WarehouseState(State, OpenEnvModel):
     agent_position: Position
     heading: Heading
     carrying: Optional[str]
+    carrying_weight: int = 0
     battery_level: int
     battery_capacity: int
+    stamina_level: int = 0
+    stamina_capacity: int = 0
+    money: float = 0.0
+    profit_target: float = 0.0
     dock_position: Position
     pack_station_position: Position
     charger_position: Position
+    obstacles: List[Position] = Field(default_factory=list)
     bins: List[BinState]
     order: List[OrderLine]
     packed_order: List[OrderLine]

grid_env/openv.yaml CHANGED Viewed

@@ -28,6 +28,21 @@ tasks:
   - id: hard_restock_priority
     difficulty: hard
     grader: grid_env.graders:grade_hard
 baseline:
   runner: grid_env.baseline:run_baseline
   seed: 7

   - id: hard_restock_priority
     difficulty: hard
     grader: grid_env.graders:grade_hard
+  - id: obstacle_course
+    difficulty: medium
+    grader: grid_env.graders:grade_obstacle_course
+  - id: heavy_lifting
+    difficulty: hard
+    grader: grid_env.graders:grade_heavy_lifting
+  - id: stamina_run
+    difficulty: hard
+    grader: grid_env.graders:grade_stamina_run
+  - id: budget_run
+    difficulty: expert
+    grader: grid_env.graders:grade_budget_run
+  - id: gauntlet
+    difficulty: expert
+    grader: grid_env.graders:grade_gauntlet
 baseline:
   runner: grid_env.baseline:run_baseline
   seed: 7

grid_env/tasks.py CHANGED Viewed

@@ -182,6 +182,393 @@ TASKS: Dict[str, TaskDefinition] = {
             },
         ],
     ),
 }

             },
         ],
     ),
+    # -----------------------------------------------------------------------
+    # Task 4: obstacle_course (medium) — obstacles block direct paths
+    # -----------------------------------------------------------------------
+    "obstacle_course": TaskDefinition(
+        task_id="obstacle_course",
+        difficulty="medium",
+        title="Obstacle-filled aisle navigation",
+        description=(
+            "Fulfill a two-item order in a warehouse cluttered with fallen crates. "
+            "Navigate around obstacles to reach bins, scan them, pick one thermometer "
+            "and one bandage kit, then pack both at the station."
+        ),
+        max_steps=70,
+        battery_capacity=40,
+        low_battery_threshold=10,
+        agent_start=(0, 0),
+        agent_heading="E",
+        dock_position=(0, 0),
+        pack_station_position=(6, 6),
+        charger_position=(0, 6),
+        bins=[
+            BinState(bin_id="A1", position=(2, 1), sku="thermometer", quantity=2),
+            BinState(bin_id="B1", position=(4, 1), sku="bandage_kit", quantity=2),
+            BinState(bin_id="A2", position=(2, 3), sku="cough_syrup", quantity=2),
+            BinState(bin_id="B2", position=(4, 3), sku="pain_relief", quantity=2),
+            BinState(bin_id="C1", position=(2, 5), sku="gloves", quantity=2),
+        ],
+        order=[
+            OrderLine(sku="thermometer", quantity=1),
+            OrderLine(sku="bandage_kit", quantity=1),
+        ],
+        required_scans=["A1", "B1"],
+        obstacles=[(1, 2), (2, 2), (3, 2), (3, 4), (4, 4), (5, 4)],
+        rubric_criteria=[
+            {
+                "name": "completion",
+                "description": "All items packed.",
+                "check": "param_at_least:state.completion_ratio=1.0",
+            },
+            {
+                "name": "scans",
+                "description": "Scanned both required bins.",
+                "check": "param_at_least:state.correct_scans=2",
+            },
+            {
+                "name": "pack_item",
+                "description": "Packed items at the station.",
+                "check": "tool_used:pack_item",
+            },
+            {
+                "name": "no_obstacle_collisions",
+                "description": "Avoided all obstacle collisions.",
+                "check": "param_at_most:state.obstacle_collisions=0",
+            },
+            {
+                "name": "no_wrong_picks",
+                "description": "No incorrect picks.",
+                "check": "param_at_most:state.wrong_picks=0",
+            },
+            {
+                "name": "few_invalid_actions",
+                "description": "At most two invalid actions.",
+                "check": "param_at_most:state.invalid_actions=2",
+            },
+        ],
+    ),
+    # -----------------------------------------------------------------------
+    # Task 5: heavy_lifting (hard) — items have weight, limited carry capacity
+    # -----------------------------------------------------------------------
+    "heavy_lifting": TaskDefinition(
+        task_id="heavy_lifting",
+        difficulty="hard",
+        title="Heavy-item logistics with weight limits",
+        description=(
+            "Fulfill a three-item order where items vary in weight (1-4 units). "
+            "The agent has a carry capacity of 3 and must choose pickup order wisely. "
+            "Heavier items drain more battery while moving. Scan each bin, pick items "
+            "that fit within your carry limit, and pack at the station. "
+            "The heavy pain_relief (weight 4) cannot be carried — skip it!"
+        ),
+        max_steps=90,
+        battery_capacity=32,
+        low_battery_threshold=8,
+        agent_start=(1, 1),
+        agent_heading="E",
+        dock_position=(1, 1),
+        pack_station_position=(5, 5),
+        charger_position=(1, 5),
+        bins=[
+            BinState(bin_id="A1", position=(2, 1), sku="thermometer", quantity=3, weight=1),
+            BinState(bin_id="A2", position=(2, 3), sku="cough_syrup", quantity=2, weight=2),
+            BinState(bin_id="B1", position=(4, 1), sku="bandage_kit", quantity=2, weight=3),
+            BinState(bin_id="B2", position=(4, 3), sku="pain_relief", quantity=2, weight=4),
+            BinState(bin_id="C1", position=(2, 5), sku="gloves", quantity=3, weight=1),
+        ],
+        order=[
+            OrderLine(sku="thermometer", quantity=1),
+            OrderLine(sku="cough_syrup", quantity=1),
+            OrderLine(sku="bandage_kit", quantity=1),
+        ],
+        required_scans=["A1", "A2", "B1"],
+        carry_capacity=3,
+        rubric_criteria=[
+            {
+                "name": "completion",
+                "description": "All items packed.",
+                "check": "param_at_least:state.completion_ratio=1.0",
+            },
+            {
+                "name": "scans",
+                "description": "Scanned all three required bins.",
+                "check": "param_at_least:state.correct_scans=3",
+            },
+            {
+                "name": "recharge",
+                "description": "Recharged at least once.",
+                "check": "tool_used:recharge",
+            },
+            {
+                "name": "no_overweight",
+                "description": "Never tried to pick an overweight item.",
+                "check": "param_at_most:state.overweight_attempts=0",
+            },
+            {
+                "name": "no_battery_depletion",
+                "description": "Avoided battery depletion.",
+                "check": "param_at_most:state.battery_depletion_events=0",
+            },
+            {
+                "name": "no_wrong_picks",
+                "description": "No incorrect picks.",
+                "check": "param_at_most:state.wrong_picks=0",
+            },
+            {
+                "name": "few_invalid_actions",
+                "description": "At most two invalid actions.",
+                "check": "param_at_most:state.invalid_actions=2",
+            },
+        ],
+    ),
+    # -----------------------------------------------------------------------
+    # Task 6: stamina_run (hard) — stamina drains on movement
+    # -----------------------------------------------------------------------
+    "stamina_run": TaskDefinition(
+        task_id="stamina_run",
+        difficulty="hard",
+        title="Endurance run with stamina management",
+        description=(
+            "Fulfill a two-item order while managing stamina. Every move drains stamina; "
+            "when stamina hits zero, movement costs double battery. Rest at the rest area "
+            "to restore stamina. Pick one cough syrup and one gloves unit, scan bins, and "
+            "pack at the station without running out of energy."
+        ),
+        max_steps=80,
+        battery_capacity=36,
+        low_battery_threshold=8,
+        agent_start=(0, 0),
+        agent_heading="E",
+        dock_position=(0, 0),
+        pack_station_position=(6, 6),
+        charger_position=(0, 6),
+        rest_position=(3, 3),
+        stamina_capacity=12,
+        stamina_move_cost=1,
+        bins=[
+            BinState(bin_id="A1", position=(2, 1), sku="thermometer", quantity=2),
+            BinState(bin_id="A2", position=(2, 3), sku="cough_syrup", quantity=2),
+            BinState(bin_id="B1", position=(4, 1), sku="bandage_kit", quantity=2),
+            BinState(bin_id="B2", position=(4, 3), sku="pain_relief", quantity=2),
+            BinState(bin_id="C1", position=(2, 5), sku="gloves", quantity=2),
+        ],
+        order=[
+            OrderLine(sku="cough_syrup", quantity=1),
+            OrderLine(sku="gloves", quantity=1),
+        ],
+        required_scans=["A2", "C1"],
+        rubric_criteria=[
+            {
+                "name": "completion",
+                "description": "All items packed.",
+                "check": "param_at_least:state.completion_ratio=1.0",
+            },
+            {
+                "name": "scans",
+                "description": "Scanned both required bins.",
+                "check": "param_at_least:state.correct_scans=2",
+            },
+            {
+                "name": "rest_used",
+                "description": "Used the rest area at least once.",
+                "check": "tool_used:rest",
+            },
+            {
+                "name": "no_stamina_depletion",
+                "description": "Avoided complete stamina depletion.",
+                "check": "param_at_most:state.stamina_depletion_events=0",
+            },
+            {
+                "name": "no_battery_depletion",
+                "description": "Avoided battery depletion.",
+                "check": "param_at_most:state.battery_depletion_events=0",
+            },
+            {
+                "name": "no_wrong_picks",
+                "description": "No incorrect picks.",
+                "check": "param_at_most:state.wrong_picks=0",
+            },
+        ],
+    ),
+    # -----------------------------------------------------------------------
+    # Task 7: budget_run (expert) — money rewards and profit target
+    # -----------------------------------------------------------------------
+    "budget_run": TaskDefinition(
+        task_id="budget_run",
+        difficulty="expert",
+        title="Profitable fulfillment under budget pressure",
+        description=(
+            "Fulfill orders for profit. Each item has a dollar value earned when correctly "
+            "packed. Wrong packs lose half the item value. You must reach a profit target "
+            "of $15.00 while completing the order. Pick high-value items efficiently: "
+            "2 thermometers ($5 each) and 1 bandage kit ($8). Budget-aware decisions matter."
+        ),
+        max_steps=70,
+        battery_capacity=30,
+        low_battery_threshold=6,
+        agent_start=(1, 1),
+        agent_heading="E",
+        dock_position=(1, 1),
+        pack_station_position=(5, 5),
+        charger_position=(1, 5),
+        bins=[
+            BinState(bin_id="A1", position=(2, 1), sku="thermometer", quantity=3, value=5.0),
+            BinState(bin_id="A2", position=(2, 3), sku="cough_syrup", quantity=2, value=3.0),
+            BinState(bin_id="B1", position=(4, 1), sku="bandage_kit", quantity=2, value=8.0),
+            BinState(bin_id="B2", position=(4, 3), sku="pain_relief", quantity=2, value=4.0),
+            BinState(bin_id="C1", position=(2, 5), sku="gloves", quantity=2, value=2.0),
+        ],
+        order=[
+            OrderLine(sku="thermometer", quantity=2),
+            OrderLine(sku="bandage_kit", quantity=1),
+        ],
+        required_scans=["A1", "B1"],
+        profit_target=15.0,
+        rubric_criteria=[
+            {
+                "name": "completion",
+                "description": "All items packed.",
+                "check": "param_at_least:state.completion_ratio=1.0",
+            },
+            {
+                "name": "profit_target",
+                "description": "Reached the profit target of $15.",
+                "check": "param_at_least:state.money_earned=15.0",
+            },
+            {
+                "name": "scans",
+                "description": "Scanned required bins.",
+                "check": "param_at_least:state.correct_scans=2",
+            },
+            {
+                "name": "recharge",
+                "description": "Recharged at least once.",
+                "check": "tool_used:recharge",
+            },
+            {
+                "name": "no_money_lost",
+                "description": "No money lost from wrong packs.",
+                "check": "param_at_most:state.money_lost=0.0",
+            },
+            {
+                "name": "no_wrong_picks",
+                "description": "No incorrect picks.",
+                "check": "param_at_most:state.wrong_picks=0",
+            },
+            {
+                "name": "few_invalid_actions",
+                "description": "At most one invalid action.",
+                "check": "param_at_most:state.invalid_actions=1",
+            },
+        ],
+    ),
+    # -----------------------------------------------------------------------
+    # Task 8: gauntlet (expert) — all mechanics combined
+    # -----------------------------------------------------------------------
+    "gauntlet": TaskDefinition(
+        task_id="gauntlet",
+        difficulty="expert",
+        title="The gauntlet: obstacles, weight, stamina, and profit",
+        description=(
+            "The ultimate warehouse challenge. Navigate a cluttered warehouse with obstacles, "
+            "manage item weights (carry capacity 3), conserve stamina (rest when needed), "
+            "earn money for packed items, and hit a $20 profit target. Fulfill a four-item "
+            "order: 1 thermometer ($5, wt 1), 1 cough syrup ($6, wt 2), 1 bandage kit ($8, wt 3), "
+            "and 1 gloves ($4, wt 1). Recharge battery, rest for stamina, avoid obstacles, "
+            "and finish profitable."
+        ),
+        max_steps=120,
+        battery_capacity=28,
+        low_battery_threshold=7,
+        agent_start=(0, 0),
+        agent_heading="S",
+        dock_position=(0, 0),
+        pack_station_position=(6, 6),
+        charger_position=(6, 0),
+        rest_position=(0, 6),
+        stamina_capacity=10,
+        stamina_move_cost=1,
+        carry_capacity=3,
+        profit_target=20.0,
+        obstacles=[(1, 1), (3, 1), (5, 3), (3, 3), (1, 5), (5, 5)],
+        bins=[
+            BinState(bin_id="A1", position=(2, 0), sku="thermometer", quantity=3, weight=1, value=5.0),
+            BinState(bin_id="A2", position=(2, 4), sku="cough_syrup", quantity=2, weight=2, value=6.0),
+            BinState(bin_id="B1", position=(4, 0), sku="bandage_kit", quantity=2, weight=3, value=8.0),
+            BinState(bin_id="B2", position=(4, 4), sku="pain_relief", quantity=2, weight=4, value=4.0),
+            BinState(bin_id="C1", position=(4, 2), sku="gloves", quantity=3, weight=1, value=4.0),
+        ],
+        order=[
+            OrderLine(sku="thermometer", quantity=1),
+            OrderLine(sku="cough_syrup", quantity=1),
+            OrderLine(sku="bandage_kit", quantity=1),
+            OrderLine(sku="gloves", quantity=1),
+        ],
+        required_scans=["A1", "A2", "B1", "C1"],
+        rubric_criteria=[
+            {
+                "name": "completion",
+                "description": "All four items packed.",
+                "check": "param_at_least:state.completion_ratio=1.0",
+            },
+            {
+                "name": "profit_target",
+                "description": "Reached $20 profit target.",
+                "check": "param_at_least:state.money_earned=20.0",
+            },
+            {
+                "name": "scans",
+                "description": "Scanned all four required bins.",
+                "check": "param_at_least:state.correct_scans=4",
+            },
+            {
+                "name": "recharge",
+                "description": "Recharged at least once.",
+                "check": "tool_used:recharge",
+            },
+            {
+                "name": "rest_used",
+                "description": "Used the rest area at least once.",
+                "check": "tool_used:rest",
+            },
+            {
+                "name": "no_obstacle_collisions",
+                "description": "Avoided all obstacle collisions.",
+                "check": "param_at_most:state.obstacle_collisions=0",
+            },
+            {
+                "name": "no_overweight",
+                "description": "Never tried to pick an overweight item.",
+                "check": "param_at_most:state.overweight_attempts=0",
+            },
+            {
+                "name": "no_battery_depletion",
+                "description": "Avoided battery depletion.",
+                "check": "param_at_most:state.battery_depletion_events=0",
+            },
+            {
+                "name": "no_stamina_depletion",
+                "description": "Avoided stamina depletion.",
+                "check": "param_at_most:state.stamina_depletion_events=0",
+            },
+            {
+                "name": "no_wrong_picks",
+                "description": "No incorrect picks.",
+                "check": "param_at_most:state.wrong_picks=0",
+            },
+            {
+                "name": "few_invalid_actions",
+                "description": "At most two invalid actions.",
+                "check": "param_at_most:state.invalid_actions=2",
+            },
+        ],
+    ),
 }

grid_env/tools.py CHANGED Viewed

@@ -18,6 +18,7 @@ _TOOL_DESCRIPTIONS: Dict[str, str] = {
     "pick_item": "Pick an item from the bin in front.",
     "pack_item": "Pack the carried item at the packing station.",
     "recharge": "Recharge the battery at the charging dock.",
     "wait": "Stay in place and consume time.",
 }

     "pick_item": "Pick an item from the bin in front.",
     "pack_item": "Pack the carried item at the packing station.",
     "recharge": "Recharge the battery at the charging dock.",
+    "rest": "Rest at the rest area to restore stamina.",
     "wait": "Stay in place and consume time.",
 }

openenv.yaml CHANGED Viewed

@@ -28,6 +28,21 @@ tasks:
   - id: hard_restock_priority
     difficulty: hard
     grader: grid_env.graders:grade_hard
 baseline:
   runner: grid_env.baseline:run_baseline
   seed: 7

   - id: hard_restock_priority
     difficulty: hard
     grader: grid_env.graders:grade_hard
+  - id: obstacle_course
+    difficulty: medium
+    grader: grid_env.graders:grade_obstacle_course
+  - id: heavy_lifting
+    difficulty: hard
+    grader: grid_env.graders:grade_heavy_lifting
+  - id: stamina_run
+    difficulty: hard
+    grader: grid_env.graders:grade_stamina_run
+  - id: budget_run
+    difficulty: expert
+    grader: grid_env.graders:grade_budget_run
+  - id: gauntlet
+    difficulty: expert
+    grader: grid_env.graders:grade_gauntlet
 baseline:
   runner: grid_env.baseline:run_baseline
   seed: 7

tests/conftest.py CHANGED Viewed

@@ -25,3 +25,38 @@ def env_hard():
     env = WarehouseFulfillmentEnv(task_id="hard_restock_priority", seed=7)
     env.reset()
     return env

     env = WarehouseFulfillmentEnv(task_id="hard_restock_priority", seed=7)
     env.reset()
     return env
+@pytest.fixture()
+def env_obstacle_course():
+    env = WarehouseFulfillmentEnv(task_id="obstacle_course", seed=7)
+    env.reset()
+    return env
+@pytest.fixture()
+def env_heavy_lifting():
+    env = WarehouseFulfillmentEnv(task_id="heavy_lifting", seed=7)
+    env.reset()
+    return env
+@pytest.fixture()
+def env_stamina_run():
+    env = WarehouseFulfillmentEnv(task_id="stamina_run", seed=7)
+    env.reset()
+    return env
+@pytest.fixture()
+def env_budget_run():
+    env = WarehouseFulfillmentEnv(task_id="budget_run", seed=7)
+    env.reset()
+    return env
+@pytest.fixture()
+def env_gauntlet():
+    env = WarehouseFulfillmentEnv(task_id="gauntlet", seed=7)
+    env.reset()
+    return env

tests/test_baseline_stub.py CHANGED Viewed

@@ -14,7 +14,16 @@ from grid_env.graders import grade_episode
 from grid_env.models import BaselineCommand
-TASK_IDS = ["easy_single_pick", "medium_multi_item", "hard_restock_priority"]
 # Cycle of deterministic actions that exercise most code paths without getting stuck.
 _ACTION_CYCLE = [
@@ -27,6 +36,7 @@ _ACTION_CYCLE = [
     "turn_right",
     "move_forward",
     "pack_item",
     "wait",
 ]

 from grid_env.models import BaselineCommand
+TASK_IDS = [
+    "easy_single_pick",
+    "medium_multi_item",
+    "hard_restock_priority",
+    "obstacle_course",
+    "heavy_lifting",
+    "stamina_run",
+    "budget_run",
+    "gauntlet",
+]
 # Cycle of deterministic actions that exercise most code paths without getting stuck.
 _ACTION_CYCLE = [
     "turn_right",
     "move_forward",
     "pack_item",
+    "rest",
     "wait",
 ]

tests/test_env_smoke.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Smoke tests: environment instantiation, reset, step, and episode termination
-for all three task IDs.
 """
 import pytest
@@ -9,7 +9,16 @@ from grid_env.graders import grade_episode
 from grid_env.models import WarehouseObservation, WarehouseReward
-TASK_IDS = ["easy_single_pick", "medium_multi_item", "hard_restock_priority"]
 ALL_ACTIONS = [
     "turn_left",
     "turn_right",
@@ -18,6 +27,7 @@ ALL_ACTIONS = [
     "pick_item",
     "pack_item",
     "recharge",
     "wait",
 ]
@@ -104,8 +114,8 @@ def test_step_after_done_is_safe():
     assert done
-def test_available_tasks_returns_all_three():
-    """available_tasks() returns exactly the three expected task IDs."""
     tasks = available_tasks()
     ids = {t["task_id"] for t in tasks}
     assert ids == set(TASK_IDS)

 """
 Smoke tests: environment instantiation, reset, step, and episode termination
+for all task IDs.
 """
 import pytest
 from grid_env.models import WarehouseObservation, WarehouseReward
+TASK_IDS = [
+    "easy_single_pick",
+    "medium_multi_item",
+    "hard_restock_priority",
+    "obstacle_course",
+    "heavy_lifting",
+    "stamina_run",
+    "budget_run",
+    "gauntlet",
+]
 ALL_ACTIONS = [
     "turn_left",
     "turn_right",
     "pick_item",
     "pack_item",
     "recharge",
+    "rest",
     "wait",
 ]
     assert done
+def test_available_tasks_returns_all():
+    """available_tasks() returns all expected task IDs."""
     tasks = available_tasks()
     ids = {t["task_id"] for t in tasks}
     assert ids == set(TASK_IDS)

tests/test_graders.py CHANGED Viewed

@@ -4,7 +4,17 @@ Unit tests for rubric-based graders.
 import pytest
-from grid_env.graders import grade_easy, grade_episode, grade_hard, grade_medium
 from grid_env.models import BinState, OrderLine, WarehouseMetrics, WarehouseState
@@ -21,6 +31,12 @@ def _make_state(
     invalid_actions: int = 0,
     recharges: int = 0,
     battery_depletion_events: int = 0,
     action_history: list[str] | None = None,
 ) -> WarehouseState:
     metrics = WarehouseMetrics(
@@ -32,6 +48,12 @@ def _make_state(
         invalid_actions=invalid_actions,
         recharges=recharges,
         battery_depletion_events=battery_depletion_events,
     )
     return WarehouseState(
         episode_id="test-ep",
@@ -138,14 +160,128 @@ def test_grade_hard_partial_rubric_scores_lower():
     assert 0.0 < grade_hard(state) < 1.0
-@pytest.mark.parametrize(
-    "task_id,grader",
-    [
-        ("easy_single_pick", grade_easy),
-        ("medium_multi_item", grade_medium),
-        ("hard_restock_priority", grade_hard),
-    ],
-)
 def test_grade_episode_dispatches_correctly(task_id, grader):
     state = _make_state(task_id, completion_ratio=0.5, action_history=["pick_item"])
     assert grade_episode(state) == grader(state)

 import pytest
+from grid_env.graders import (
+    grade_budget_run,
+    grade_easy,
+    grade_episode,
+    grade_gauntlet,
+    grade_hard,
+    grade_heavy_lifting,
+    grade_medium,
+    grade_obstacle_course,
+    grade_stamina_run,
+)
 from grid_env.models import BinState, OrderLine, WarehouseMetrics, WarehouseState
     invalid_actions: int = 0,
     recharges: int = 0,
     battery_depletion_events: int = 0,
+    stamina_depletion_events: int = 0,
+    rest_events: int = 0,
+    obstacle_collisions: int = 0,
+    money_earned: float = 0.0,
+    money_lost: float = 0.0,
+    overweight_attempts: int = 0,
     action_history: list[str] | None = None,
 ) -> WarehouseState:
     metrics = WarehouseMetrics(
         invalid_actions=invalid_actions,
         recharges=recharges,
         battery_depletion_events=battery_depletion_events,
+        stamina_depletion_events=stamina_depletion_events,
+        rest_events=rest_events,
+        obstacle_collisions=obstacle_collisions,
+        money_earned=money_earned,
+        money_lost=money_lost,
+        overweight_attempts=overweight_attempts,
     )
     return WarehouseState(
         episode_id="test-ep",
     assert 0.0 < grade_hard(state) < 1.0
+def test_grade_obstacle_course_full_rubric_passes():
+    state = _make_state(
+        "obstacle_course",
+        completion_ratio=1.0,
+        correct_scans=2,
+        wrong_picks=0,
+        invalid_actions=2,
+        obstacle_collisions=0,
+        action_history=["scan_bin", "pick_item", "pack_item"],
+    )
+    assert grade_obstacle_course(state) == pytest.approx(1.0)
+def test_grade_obstacle_course_collision_lowers_score():
+    state = _make_state(
+        "obstacle_course",
+        completion_ratio=1.0,
+        correct_scans=2,
+        wrong_picks=0,
+        invalid_actions=2,
+        obstacle_collisions=3,
+        action_history=["scan_bin", "pick_item", "pack_item"],
+    )
+    assert 0.0 < grade_obstacle_course(state) < 1.0
+def test_grade_heavy_lifting_full_rubric_passes():
+    state = _make_state(
+        "heavy_lifting",
+        completion_ratio=1.0,
+        correct_scans=3,
+        wrong_picks=0,
+        invalid_actions=2,
+        recharges=1,
+        battery_depletion_events=0,
+        overweight_attempts=0,
+        action_history=["scan_bin", "pick_item", "pack_item", "recharge"],
+    )
+    assert grade_heavy_lifting(state) == pytest.approx(1.0)
+def test_grade_stamina_run_full_rubric_passes():
+    state = _make_state(
+        "stamina_run",
+        completion_ratio=1.0,
+        correct_scans=2,
+        wrong_picks=0,
+        stamina_depletion_events=0,
+        battery_depletion_events=0,
+        rest_events=1,
+        action_history=["scan_bin", "pick_item", "pack_item", "rest"],
+    )
+    assert grade_stamina_run(state) == pytest.approx(1.0)
+def test_grade_budget_run_full_rubric_passes():
+    state = _make_state(
+        "budget_run",
+        completion_ratio=1.0,
+        correct_scans=2,
+        wrong_picks=0,
+        invalid_actions=1,
+        recharges=1,
+        money_earned=18.0,
+        money_lost=0.0,
+        action_history=["scan_bin", "pick_item", "pack_item", "recharge"],
+    )
+    assert grade_budget_run(state) == pytest.approx(1.0)
+def test_grade_gauntlet_full_rubric_passes():
+    state = _make_state(
+        "gauntlet",
+        completion_ratio=1.0,
+        correct_scans=4,
+        wrong_picks=0,
+        invalid_actions=2,
+        recharges=1,
+        battery_depletion_events=0,
+        stamina_depletion_events=0,
+        rest_events=1,
+        obstacle_collisions=0,
+        overweight_attempts=0,
+        money_earned=23.0,
+        money_lost=0.0,
+        action_history=["scan_bin", "pick_item", "pack_item", "recharge", "rest"],
+    )
+    assert grade_gauntlet(state) == pytest.approx(1.0)
+def test_grade_gauntlet_partial_scores_lower():
+    state = _make_state(
+        "gauntlet",
+        completion_ratio=1.0,
+        correct_scans=2,
+        wrong_picks=0,
+        invalid_actions=5,
+        recharges=0,
+        battery_depletion_events=1,
+        stamina_depletion_events=1,
+        obstacle_collisions=2,
+        overweight_attempts=1,
+        money_earned=10.0,
+        money_lost=5.0,
+        action_history=["scan_bin", "pick_item", "pack_item"],
+    )
+    assert 0.0 < grade_gauntlet(state) < 1.0
+ALL_GRADERS = [
+    ("easy_single_pick", grade_easy),
+    ("medium_multi_item", grade_medium),
+    ("hard_restock_priority", grade_hard),
+    ("obstacle_course", grade_obstacle_course),
+    ("heavy_lifting", grade_heavy_lifting),
+    ("stamina_run", grade_stamina_run),
+    ("budget_run", grade_budget_run),
+    ("gauntlet", grade_gauntlet),
+]
+@pytest.mark.parametrize("task_id,grader", ALL_GRADERS)
 def test_grade_episode_dispatches_correctly(task_id, grader):
     state = _make_state(task_id, completion_ratio=0.5, action_history=["pick_item"])
     assert grade_episode(state) == grader(state)

tests/test_server.py CHANGED Viewed

@@ -18,7 +18,16 @@ from fastapi.testclient import TestClient
 from grid_env.Server.app import app
-TASK_IDS = ["easy_single_pick", "medium_multi_item", "hard_restock_priority"]
 ALL_ACTIONS = [
     "turn_left",
     "turn_right",
@@ -27,6 +36,7 @@ ALL_ACTIONS = [
     "pick_item",
     "pack_item",
     "recharge",
     "wait",
 ]
@@ -78,7 +88,7 @@ def test_tasks_has_tasks_key(client):
     assert isinstance(body["tasks"], list)
-def test_tasks_returns_all_three(client):
     body = client.get("/tasks").json()
     ids = {t["task_id"] for t in body["tasks"]}
     assert ids == set(TASK_IDS)

 from grid_env.Server.app import app
+TASK_IDS = [
+    "easy_single_pick",
+    "medium_multi_item",
+    "hard_restock_priority",
+    "obstacle_course",
+    "heavy_lifting",
+    "stamina_run",
+    "budget_run",
+    "gauntlet",
+]
 ALL_ACTIONS = [
     "turn_left",
     "turn_right",
     "pick_item",
     "pack_item",
     "recharge",
+    "rest",
     "wait",
 ]
     assert isinstance(body["tasks"], list)
+def test_tasks_returns_all(client):
     body = client.get("/tasks").json()
     ids = {t["task_id"] for t in body["tasks"]}
     assert ids == set(TASK_IDS)

tests/test_tasks.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Tests for task definitions: presence of all 3 tasks, structural validity,
 and grader callability.
 """
@@ -9,11 +9,20 @@ from grid_env.graders import grade_episode
 from grid_env.env import WarehouseFulfillmentEnv
-EXPECTED_TASK_IDS = {"easy_single_pick", "medium_multi_item", "hard_restock_priority"}
-def test_exactly_three_tasks_registered():
-    assert len(TASKS) == 3
 def test_all_expected_task_ids_present():
@@ -24,7 +33,7 @@ def test_all_expected_task_ids_present():
 def test_task_has_required_fields(task_id):
     task = get_task(task_id)
     assert task.task_id == task_id
-    assert task.difficulty in {"easy", "medium", "hard"}
     assert task.max_steps > 0
     assert task.battery_capacity > 0
     assert len(task.bins) > 0
@@ -69,6 +78,24 @@ def test_grader_callable_returns_float_in_range(task_id):
     assert 0.0 <= score <= 1.0
 def test_get_task_raises_on_unknown_id():
     with pytest.raises(KeyError, match="Unknown task_id"):
         get_task("does_not_exist")

 """
+Tests for task definitions: presence of all tasks, structural validity,
 and grader callability.
 """
 from grid_env.env import WarehouseFulfillmentEnv
+EXPECTED_TASK_IDS = {
+    "easy_single_pick",
+    "medium_multi_item",
+    "hard_restock_priority",
+    "obstacle_course",
+    "heavy_lifting",
+    "stamina_run",
+    "budget_run",
+    "gauntlet",
+}
+def test_expected_task_count():
+    assert len(TASKS) == len(EXPECTED_TASK_IDS)
 def test_all_expected_task_ids_present():
 def test_task_has_required_fields(task_id):
     task = get_task(task_id)
     assert task.task_id == task_id
+    assert task.difficulty in {"easy", "medium", "hard", "expert"}
     assert task.max_steps > 0
     assert task.battery_capacity > 0
     assert len(task.bins) > 0
     assert 0.0 <= score <= 1.0
+@pytest.mark.parametrize("task_id", list(EXPECTED_TASK_IDS))
+def test_obstacles_do_not_overlap_bins_or_stations(task_id):
+    """Obstacles must not overlap with bins, stations, or agent start."""
+    task = get_task(task_id)
+    obstacle_set = set(task.obstacles)
+    bin_positions = {tuple(b.position) for b in task.bins}
+    fixed_positions = {
+        tuple(task.pack_station_position),
+        tuple(task.charger_position),
+        tuple(task.dock_position),
+        tuple(task.agent_start),
+    }
+    if task.rest_position:
+        fixed_positions.add(tuple(task.rest_position))
+    assert obstacle_set.isdisjoint(bin_positions), f"Obstacle overlaps bin in {task_id}"
+    assert obstacle_set.isdisjoint(fixed_positions), f"Obstacle overlaps station in {task_id}"
 def test_get_task_raises_on_unknown_id():
     with pytest.raises(KeyError, match="Unknown task_id"):
         get_task("does_not_exist")