Spaces:

SushCodex
/

META-Hack

Sleeping

App Files Files Community

META-Hack / grader.py

SushCodex

Upload 15 files

a7f095f verified about 2 months ago

raw

history blame contribute delete

1.91 kB

	from env import WarehouseEnv
	import numpy as np

	def grade_agent(task_id, actions):
	"""
	Grades an agent's sequence of actions against a specific warehouse task.
	"""
	from tasks import get_task
	task_config = get_task(task_id)

	env = WarehouseEnv()
	obs, info = env.reset(options={
	"level": task_config["level"],
	"targets": task_config["targets"]
	})

	total_reward = 0
	steps = 0
	done = False

	for action in actions:
	if done:
	break
	obs, reward, terminated, truncated, info = env.step(action)
	total_reward += reward
	steps += 1
	done = terminated or truncated

	# Evaluation Criteria
	is_success = info.get("is_success", False)

	# Grading Algorithm
	score = 0
	if is_success:
	# Base completion score: 50
	# Efficiency bonus: up to 50
	efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"])
	score = 50 + (50 * efficiency)
	else:
	# Partial credit: 10 points per item collected
	score = info.get("items_collected", 0) * 10

	# Ensure no unfair score
	score = max(0, min(100, score))

	return {
	"is_success": is_success,
	"final_score": round(score, 2),
	"total_reward": total_reward,
	"steps_taken": steps,
	"items_collected": info.get("items_collected", 0),
	"target_count": len(task_config["targets"]),
	"status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)")
	}

	if __name__ == "__main__":
	# Test Level 1: Navigate [0,0] -> [5,5] -> [0,0]
	# Simple manual path for testing the grader
	test_actions = ([3]5 + [0]5 + [4] + [2]5 + [1]5 + [5])
	result = grade_agent(1, test_actions)
	print(f"--- Grading Test (Level 1) ---")
	print(result)