| from env import WarehouseEnv |
| import numpy as np |
|
|
| def grade_agent(task_id, actions): |
| """ |
| Grades an agent's sequence of actions against a specific warehouse task. |
| """ |
| from tasks import get_task |
| task_config = get_task(task_id) |
| |
| env = WarehouseEnv() |
| obs, info = env.reset(options={ |
| "level": task_config["level"], |
| "targets": task_config["targets"] |
| }) |
| |
| total_reward = 0 |
| steps = 0 |
| done = False |
| |
| for action in actions: |
| if done: |
| break |
| obs, reward, terminated, truncated, info = env.step(action) |
| total_reward += reward |
| steps += 1 |
| done = terminated or truncated |
|
|
| |
| is_success = info.get("is_success", False) |
| |
| |
| score = 0 |
| if is_success: |
| |
| |
| efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"]) |
| score = 50 + (50 * efficiency) |
| else: |
| |
| score = info.get("items_collected", 0) * 10 |
|
|
| |
| score = max(0, min(100, score)) |
|
|
| return { |
| "is_success": is_success, |
| "final_score": round(score, 2), |
| "total_reward": total_reward, |
| "steps_taken": steps, |
| "items_collected": info.get("items_collected", 0), |
| "target_count": len(task_config["targets"]), |
| "status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)") |
| } |
|
|
| if __name__ == "__main__": |
| |
| |
| test_actions = ([3]*5 + [0]*5 + [4] + [2]*5 + [1]*5 + [5]) |
| result = grade_agent(1, test_actions) |
| print(f"--- Grading Test (Level 1) ---") |
| print(result) |
|
|