Spaces:
Sleeping
Sleeping
| from env import WarehouseEnv | |
| import numpy as np | |
| def grade_agent(task_id, actions): | |
| """ | |
| Grades an agent's sequence of actions against a specific warehouse task. | |
| """ | |
| from tasks import get_task | |
| task_config = get_task(task_id) | |
| env = WarehouseEnv() | |
| obs, info = env.reset(options={ | |
| "level": task_config["level"], | |
| "targets": task_config["targets"] | |
| }) | |
| total_reward = 0 | |
| steps = 0 | |
| done = False | |
| for action in actions: | |
| if done: | |
| break | |
| obs, reward, terminated, truncated, info = env.step(action) | |
| total_reward += reward | |
| steps += 1 | |
| done = terminated or truncated | |
| # Evaluation Criteria | |
| is_success = info.get("is_success", False) | |
| # Grading Algorithm | |
| score = 0 | |
| if is_success: | |
| # Base completion score: 50 | |
| # Efficiency bonus: up to 50 | |
| efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"]) | |
| score = 50 + (50 * efficiency) | |
| else: | |
| # Partial credit: 10 points per item collected | |
| score = info.get("items_collected", 0) * 10 | |
| # Ensure no unfair score | |
| score = max(0, min(100, score)) | |
| return { | |
| "is_success": is_success, | |
| "final_score": round(score, 2), | |
| "total_reward": total_reward, | |
| "steps_taken": steps, | |
| "items_collected": info.get("items_collected", 0), | |
| "target_count": len(task_config["targets"]), | |
| "status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)") | |
| } | |
| if __name__ == "__main__": | |
| # Test Level 1: Navigate [0,0] -> [5,5] -> [0,0] | |
| # Simple manual path for testing the grader | |
| test_actions = ([3]*5 + [0]*5 + [4] + [2]*5 + [1]*5 + [5]) | |
| result = grade_agent(1, test_actions) | |
| print(f"--- Grading Test (Level 1) ---") | |
| print(result) | |