|
|
|
|
|
|
|
|
|
|
|
|
| """
|
| Task graders for the Energy & Memory RAM Optimization Environment.
|
|
|
| Each grader function evaluates agent performance on a specific task,
|
| returning a score from 0.0 (worst) to 1.0 (best).
|
| """
|
|
|
| from he_demo.models import EnergyOptimizationObservation
|
|
|
|
|
| def grade_basic_ram_reduction(observation: EnergyOptimizationObservation) -> float:
|
| """Grade performance on basic RAM reduction task: Reduce RAM usage below 70%."""
|
|
|
| ram_score = max(0.0, min(1.0, (100.0 - observation.ram_usage) / (100.0 - 70.0)))
|
| energy_score = max(0.0, min(1.0, (10.0 - observation.energy_consumption) / (10.0 - 7.5)))
|
| step_penalty = 1.0 if observation.steps_taken <= 10 else max(0.0, 1.0 - (observation.steps_taken - 10) * 0.1)
|
|
|
| return (ram_score + energy_score) / 2.0 * step_penalty
|
|
|
|
|
| def grade_energy_optimization(observation: EnergyOptimizationObservation) -> float:
|
| """Grade performance on energy optimization task: Reduce energy below 6 kWh while maintaining RAM below 75%."""
|
|
|
| ram_score = max(0.0, min(1.0, (100.0 - observation.ram_usage) / (100.0 - 75.0)))
|
| energy_score = max(0.0, min(1.0, (10.0 - observation.energy_consumption) / (10.0 - 6.0)))
|
| step_penalty = 1.0 if observation.steps_taken <= 15 else max(0.0, 1.0 - (observation.steps_taken - 15) * 0.1)
|
|
|
| return (ram_score + energy_score) / 2.0 * step_penalty
|
|
|
|
|
| def grade_balanced_optimization(observation: EnergyOptimizationObservation) -> float:
|
| """Grade performance on balanced optimization task: Balance RAM below 60% and energy below 5 kWh."""
|
|
|
| ram_score = max(0.0, min(1.0, (100.0 - observation.ram_usage) / (100.0 - 60.0)))
|
| energy_score = max(0.0, min(1.0, (10.0 - observation.energy_consumption) / (10.0 - 5.0)))
|
| step_penalty = 1.0 if observation.steps_taken <= 20 else max(0.0, 1.0 - (observation.steps_taken - 20) * 0.1)
|
|
|
| return (ram_score + energy_score) / 2.0 * step_penalty
|
|
|
|
|
| def grade_advanced_efficiency(observation: EnergyOptimizationObservation) -> float:
|
| """Grade performance on advanced efficiency task: Achieve RAM below 50% and energy below 4 kWh."""
|
|
|
| ram_score = max(0.0, min(1.0, (100.0 - observation.ram_usage) / (100.0 - 50.0)))
|
| energy_score = max(0.0, min(1.0, (10.0 - observation.energy_consumption) / (10.0 - 4.0)))
|
| step_penalty = 1.0 if observation.steps_taken <= 25 else max(0.0, 1.0 - (observation.steps_taken - 25) * 0.1)
|
|
|
| return (ram_score + energy_score) / 2.0 * step_penalty
|
|
|
|
|
| def grade_expert_optimization(observation: EnergyOptimizationObservation) -> float:
|
| """Grade performance on expert optimization task: Master level - RAM below 40% and energy below 3 kWh."""
|
|
|
| ram_score = max(0.0, min(1.0, (100.0 - observation.ram_usage) / (100.0 - 40.0)))
|
| energy_score = max(0.0, min(1.0, (10.0 - observation.energy_consumption) / (10.0 - 3.0)))
|
| step_penalty = 1.0 if observation.steps_taken <= 30 else max(0.0, 1.0 - (observation.steps_taken - 30) * 0.1)
|
|
|
| return (ram_score + energy_score) / 2.0 * step_penalty |