OPENSPEC_Hackhathon / grader.py
SushCodex's picture
Upload 14 files
85768b6 verified
from env import WarehouseEnv
import numpy as np
def grade_agent(task_id, actions):
"""
Grades an agent's sequence of actions against a specific warehouse task.
"""
from tasks import get_task
task_config = get_task(task_id)
env = WarehouseEnv()
obs, info = env.reset(options={
"level": task_config["level"],
"targets": task_config["targets"]
})
total_reward = 0
steps = 0
done = False
for action in actions:
if done:
break
obs, reward, terminated, truncated, info = env.step(action)
total_reward += reward
steps += 1
done = terminated or truncated
# Evaluation Criteria
is_success = info.get("is_success", False)
# Grading Algorithm
score = 0
if is_success:
# Base completion score: 50
# Efficiency bonus: up to 50
efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"])
score = 50 + (50 * efficiency)
else:
# Partial credit: 10 points per item collected
score = info.get("items_collected", 0) * 10
# Ensure no unfair score
score = max(0, min(100, score))
return {
"is_success": is_success,
"final_score": round(score, 2),
"total_reward": total_reward,
"steps_taken": steps,
"items_collected": info.get("items_collected", 0),
"target_count": len(task_config["targets"]),
"status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)")
}
if __name__ == "__main__":
# Test Level 1: Navigate [0,0] -> [5,5] -> [0,0]
# Simple manual path for testing the grader
test_actions = ([3]*5 + [0]*5 + [4] + [2]*5 + [1]*5 + [5])
result = grade_agent(1, test_actions)
print(f"--- Grading Test (Level 1) ---")
print(result)