File size: 6,114 Bytes
e18fa06 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | """
Test the grading function directly to understand the 0.0 score issue.
"""
import asyncio
from client import AdaptiveProjectManagerClient
from graders.base_grader import (
compute_final_score,
_compute_completion_score,
_compute_deadline_score,
_compute_budget_score,
_compute_team_health_score,
)
from models import ProjectAction, Assignment
async def test_grading():
"""Test grading on a completed project."""
print("=" * 80)
print("GRADING TEST")
print("=" * 80)
env = await AdaptiveProjectManagerClient.from_docker_image("adaptive-project-manager:latest")
try:
result = await env.reset(task_id="easy")
# Run to completion
max_steps = 20
for i in range(max_steps):
if result.done:
break
obs = result.observation
# Simple strategy: assign available employees to available tasks
assignments = []
available_tasks = [t for t in obs.tasks if t.status in ["todo", "in_progress"]]
available_employees = [e for e in obs.employees if e.available]
# Match employees to tasks by skill
for emp in available_employees[:3]:
for task in available_tasks:
if task.required_skill in emp.skills and not any(a.task_id == task.id for a in assignments):
assignments.append(Assignment(employee_id=emp.id, task_id=task.id))
break
action = ProjectAction(assignments=assignments, contingency_action="none")
result = await env.step(action)
# Get the project state
project_state = env.get_project_state()
print(f"\nProject State at End:")
print(f" Day: {project_state.day}")
print(f" Total days: {project_state.total_days}")
print(f" Budget total: ${project_state.budget_total:,.0f}")
print(f" Budget spent: ${project_state.budget_spent:,.0f}")
print(f" Stakeholder satisfaction: {project_state.stakeholder_satisfaction:.2f}")
print(f"\nTasks:")
for task in project_state.tasks:
print(f" {task.id}: {task.status}, priority={task.priority}, critical={task.is_critical_path}")
print(f"\nEmployees:")
for emp in project_state.employees:
print(f" {emp.id}: burnout={emp.burnout:.2f}")
# Compute score components manually
print(f"\n" + "=" * 80)
print("SCORE COMPONENTS")
print("=" * 80)
completion_score = _compute_completion_score(project_state)
print(f"\n1. Completion Score: {completion_score:.4f}")
print(f" (35% weight)")
completed = sum(1 for t in project_state.tasks if t.status == "done")
print(f" Completed: {completed}/{len(project_state.tasks)}")
deadline_score = _compute_deadline_score(project_state)
print(f"\n2. Deadline Score: {deadline_score:.4f}")
print(f" (25% weight)")
days_remaining = project_state.total_days - project_state.day
print(f" Days remaining: {days_remaining}")
critical_tasks = [t for t in project_state.tasks if t.is_critical_path]
critical_done = all(t.status == "done" for t in critical_tasks)
print(f" All critical tasks done: {critical_done}")
print(f" Critical tasks: {[t.id for t in critical_tasks]}")
print(f" Critical task statuses: {[(t.id, t.status) for t in critical_tasks]}")
budget_score = _compute_budget_score(project_state)
print(f"\n3. Budget Score: {budget_score:.4f}")
print(f" (15% weight)")
print(f" Budget remaining: ${project_state.budget_total - project_state.budget_spent:,.0f}")
team_health_score = _compute_team_health_score(project_state)
print(f"\n4. Team Health Score: {team_health_score:.4f}")
print(f" (15% weight)")
avg_burnout = sum(e.burnout for e in project_state.employees) / len(project_state.employees)
print(f" Average burnout: {avg_burnout:.2f}")
stakeholder_score = project_state.stakeholder_satisfaction
print(f"\n5. Stakeholder Score: {stakeholder_score:.4f}")
print(f" (10% weight)")
# Compute final score
print(f"\n" + "=" * 80)
print("FINAL SCORE CALCULATION")
print("=" * 80)
final_score = compute_final_score(project_state)
print(f"\nFinal Score = (")
print(f" 0.35 * {completion_score:.4f}")
print(f" + 0.25 * {deadline_score:.4f}")
print(f" + 0.15 * {budget_score:.4f}")
print(f" + 0.15 * {team_health_score:.4f}")
print(f" + 0.10 * {stakeholder_score:.4f}")
print(f") = {final_score:.4f}")
manual_calc = (
0.35 * completion_score
+ 0.25 * deadline_score
+ 0.15 * budget_score
+ 0.15 * team_health_score
+ 0.10 * stakeholder_score
)
print(f"\nManual calculation: {manual_calc:.4f}")
# Now check what the environment returned
if result.done:
obs = result.observation
env_score = obs.metadata.get("final_score", 0.0)
print(f"Environment returned score: {env_score:.4f}")
if abs(env_score - final_score) > 0.0001:
print(f"\n⚠️ MISMATCH! Environment score ({env_score:.4f}) != Computed score ({final_score:.4f})")
else:
print(f"\n✅ Scores match!")
await env.close()
except Exception as e:
print(f"\nError: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(test_grading())
|