from env import WarehouseEnv import numpy as np def grade_agent(task_id, actions): """ Grades an agent's sequence of actions against a specific warehouse task. """ from tasks import get_task task_config = get_task(task_id) env = WarehouseEnv() obs, info = env.reset(options={ "level": task_config["level"], "targets": task_config["targets"] }) total_reward = 0 steps = 0 done = False for action in actions: if done: break obs, reward, terminated, truncated, info = env.step(action) total_reward += reward steps += 1 done = terminated or truncated # Evaluation Criteria is_success = info.get("is_success", False) # Grading Algorithm score = 0 if is_success: # Base completion score: 50 # Efficiency bonus: up to 50 efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"]) score = 50 + (50 * efficiency) else: # Partial credit: 10 points per item collected score = info.get("items_collected", 0) * 10 # Ensure no unfair score score = max(0, min(100, score)) return { "is_success": is_success, "final_score": round(score, 2), "total_reward": total_reward, "steps_taken": steps, "items_collected": info.get("items_collected", 0), "target_count": len(task_config["targets"]), "status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)") } if __name__ == "__main__": # Test Level 1: Navigate [0,0] -> [5,5] -> [0,0] # Simple manual path for testing the grader test_actions = ([3]*5 + [0]*5 + [4] + [2]*5 + [1]*5 + [5]) result = grade_agent(1, test_actions) print(f"--- Grading Test (Level 1) ---") print(result)