""" Test the grading function directly to understand the 0.0 score issue. """ import asyncio from client import AdaptiveProjectManagerClient from graders.base_grader import ( compute_final_score, _compute_completion_score, _compute_deadline_score, _compute_budget_score, _compute_team_health_score, ) from models import ProjectAction, Assignment async def test_grading(): """Test grading on a completed project.""" print("=" * 80) print("GRADING TEST") print("=" * 80) env = await AdaptiveProjectManagerClient.from_docker_image("adaptive-project-manager:latest") try: result = await env.reset(task_id="easy") # Run to completion max_steps = 20 for i in range(max_steps): if result.done: break obs = result.observation # Simple strategy: assign available employees to available tasks assignments = [] available_tasks = [t for t in obs.tasks if t.status in ["todo", "in_progress"]] available_employees = [e for e in obs.employees if e.available] # Match employees to tasks by skill for emp in available_employees[:3]: for task in available_tasks: if task.required_skill in emp.skills and not any(a.task_id == task.id for a in assignments): assignments.append(Assignment(employee_id=emp.id, task_id=task.id)) break action = ProjectAction(assignments=assignments, contingency_action="none") result = await env.step(action) # Get the project state project_state = env.get_project_state() print(f"\nProject State at End:") print(f" Day: {project_state.day}") print(f" Total days: {project_state.total_days}") print(f" Budget total: ${project_state.budget_total:,.0f}") print(f" Budget spent: ${project_state.budget_spent:,.0f}") print(f" Stakeholder satisfaction: {project_state.stakeholder_satisfaction:.2f}") print(f"\nTasks:") for task in project_state.tasks: print(f" {task.id}: {task.status}, priority={task.priority}, critical={task.is_critical_path}") print(f"\nEmployees:") for emp in project_state.employees: print(f" {emp.id}: burnout={emp.burnout:.2f}") # Compute score components manually print(f"\n" + "=" * 80) print("SCORE COMPONENTS") print("=" * 80) completion_score = _compute_completion_score(project_state) print(f"\n1. Completion Score: {completion_score:.4f}") print(f" (35% weight)") completed = sum(1 for t in project_state.tasks if t.status == "done") print(f" Completed: {completed}/{len(project_state.tasks)}") deadline_score = _compute_deadline_score(project_state) print(f"\n2. Deadline Score: {deadline_score:.4f}") print(f" (25% weight)") days_remaining = project_state.total_days - project_state.day print(f" Days remaining: {days_remaining}") critical_tasks = [t for t in project_state.tasks if t.is_critical_path] critical_done = all(t.status == "done" for t in critical_tasks) print(f" All critical tasks done: {critical_done}") print(f" Critical tasks: {[t.id for t in critical_tasks]}") print(f" Critical task statuses: {[(t.id, t.status) for t in critical_tasks]}") budget_score = _compute_budget_score(project_state) print(f"\n3. Budget Score: {budget_score:.4f}") print(f" (15% weight)") print(f" Budget remaining: ${project_state.budget_total - project_state.budget_spent:,.0f}") team_health_score = _compute_team_health_score(project_state) print(f"\n4. Team Health Score: {team_health_score:.4f}") print(f" (15% weight)") avg_burnout = sum(e.burnout for e in project_state.employees) / len(project_state.employees) print(f" Average burnout: {avg_burnout:.2f}") stakeholder_score = project_state.stakeholder_satisfaction print(f"\n5. Stakeholder Score: {stakeholder_score:.4f}") print(f" (10% weight)") # Compute final score print(f"\n" + "=" * 80) print("FINAL SCORE CALCULATION") print("=" * 80) final_score = compute_final_score(project_state) print(f"\nFinal Score = (") print(f" 0.35 * {completion_score:.4f}") print(f" + 0.25 * {deadline_score:.4f}") print(f" + 0.15 * {budget_score:.4f}") print(f" + 0.15 * {team_health_score:.4f}") print(f" + 0.10 * {stakeholder_score:.4f}") print(f") = {final_score:.4f}") manual_calc = ( 0.35 * completion_score + 0.25 * deadline_score + 0.15 * budget_score + 0.15 * team_health_score + 0.10 * stakeholder_score ) print(f"\nManual calculation: {manual_calc:.4f}") # Now check what the environment returned if result.done: obs = result.observation env_score = obs.metadata.get("final_score", 0.0) print(f"Environment returned score: {env_score:.4f}") if abs(env_score - final_score) > 0.0001: print(f"\n⚠️ MISMATCH! Environment score ({env_score:.4f}) != Computed score ({final_score:.4f})") else: print(f"\n✅ Scores match!") await env.close() except Exception as e: print(f"\nError: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(test_grading())