from env import WarehouseEnv
import numpy as np

def grade_agent(task_id, actions):
    """
    Grades an agent's sequence of actions against a specific warehouse task.
    """
    from tasks import get_task
    task_config = get_task(task_id)
    
    env = WarehouseEnv()
    obs, info = env.reset(options={
        "level": task_config["level"],
        "targets": task_config["targets"]
    })
    
    total_reward = 0
    steps = 0
    done = False
    
    for action in actions:
        if done:
            break
        obs, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        steps += 1
        done = terminated or truncated

    # Evaluation Criteria
    is_success = info.get("is_success", False)
    
    # Grading Algorithm
    score = 0
    if is_success:
        # Base completion score: 50
        # Efficiency bonus: up to 50
        efficiency = max(0, (task_config["max_steps"] - steps) / task_config["max_steps"])
        score = 50 + (50 * efficiency)
    else:
        # Partial credit: 10 points per item collected
        score = info.get("items_collected", 0) * 10

    # Ensure no unfair score
    score = max(0, min(100, score))

    return {
        "is_success": is_success,
        "final_score": round(score, 2),
        "total_reward": total_reward,
        "steps_taken": steps,
        "items_collected": info.get("items_collected", 0),
        "target_count": len(task_config["targets"]),
        "status": "Completed" if is_success else ("Failed (Timeout)" if steps >= task_config["max_steps"] else "Failed (Collision/Error)")
    }

if __name__ == "__main__":
    # Test Level 1: Navigate [0,0] -> [5,5] -> [0,0]
    # Simple manual path for testing the grader
    test_actions = ([3]*5 + [0]*5 + [4] + [2]*5 + [1]*5 + [5])
    result = grade_agent(1, test_actions)
    print(f"--- Grading Test (Level 1) ---")
    print(result)