energy-optimization-ppo / task_graders.py
Sushruth21's picture
Upload folder using huggingface_hub
e00c2a1 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Task Graders for Energy & Memory RAM Optimization Environment.
This module defines explicit graders for each task that evaluate agent performance
on a 0.0-1.0 scale. Each grader calculates scores based on:
- RAM usage optimization (percentage reduction from baseline)
- Energy consumption optimization (kWh reduction)
- Efficiency within step limits
- Real-world optimization metrics
The graders are exposed through the TASK_GRADERS registry for easy discovery.
"""
from typing import Callable, Dict, Any
from he_demo.models import EnergyOptimizationObservation
# ============================================================================
# TASK 1: Basic RAM Reduction (Easy Level - Difficulty 1)
# ============================================================================
def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 1: Basic RAM Reduction
Target: Reduce RAM usage below 70%, Energy below 7.5 kWh within 10 steps.
Real-world application: Reducing memory footprint is critical for:
- Running applications on resource-constrained devices
- Improving system responsiveness during high loads
- Preventing out-of-memory errors on edge devices
Scoring:
- RAM Score: 0.0 (80% baseline) → 1.0 (70% target)
- Energy Score: 0.0 (8.0 kWh baseline) → 1.0 (7.5 kWh target)
- Step Efficiency: Penalty if exceeding 10 steps
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_target = 70.0
energy_target = 7.5
max_steps = 10
# Baseline values for scoring normalization
ram_baseline = 100.0 # Maximum possible RAM
energy_baseline = 10.0 # Maximum possible energy
# Calculate RAM score: how close we are to the target (lower is better)
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
# Calculate Energy score: how close we are to the target (lower is better)
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Step efficiency penalty: agent should complete within max_steps
if observation.steps_taken <= max_steps:
step_efficiency = 1.0
else:
# Penalty of 10% per step over limit
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
# Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
return round(composite_score, 3)
# ============================================================================
# TASK 2: Energy Optimization (Medium Level - Difficulty 2)
# ============================================================================
def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 2: Energy Optimization
Target: Reduce energy consumption below 6 kWh while keeping RAM below 75% within 15 steps.
Real-world application: Energy optimization is essential for:
- Data centers reducing operational costs and carbon footprint
- Mobile/IoT devices extending battery life
- Cloud providers meeting sustainability goals
Scoring:
- Energy Score: 0.0 (8.0 kWh) → 1.0 (6.0 kWh target) [Primary focus - 50%]
- RAM Constraint Score: Penalty if RAM > 75% [Constraint - 25%]
- Step Efficiency: Bonus for completing within 15 steps [Efficiency - 25%]
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_constraint = 75.0 # Must stay below this
energy_target = 6.0 # Primary optimization target
max_steps = 15
# Baseline values
energy_baseline = 10.0
# Primary objective: Energy reduction
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Constraint: RAM must not exceed threshold
if observation.ram_usage <= ram_constraint:
ram_constraint_score = 1.0
else:
# Penalty for every 1% over constraint (max 1%)
overage = observation.ram_usage - ram_constraint
ram_constraint_score = max(0.0, 1.0 - (overage / 5.0)) # 5% buffer before full penalty
# Step efficiency
if observation.steps_taken <= max_steps:
step_efficiency = 1.0
else:
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
# Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
return round(composite_score, 3)
# ============================================================================
# TASK 3: Balanced Optimization (Hard Level - Difficulty 3)
# ============================================================================
def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 3: Balanced Optimization
Target: Balance RAM below 60% and energy below 5 kWh within 20 steps.
Real-world application: Balanced optimization is required for:
- Production systems requiring both memory and energy efficiency
- Cloud services managing multi-tenant workloads
- Edge computing with dual constraints
Scoring:
- RAM Score: 0.0 (100%) → 1.0 (60% target) [50%]
- Energy Score: 0.0 (10 kWh) → 1.0 (5 kWh target) [50%]
- Step Efficiency Bonus: Extra credit for quick completion
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_target = 60.0
energy_target = 5.0
max_steps = 20
# Baseline values
ram_baseline = 100.0
energy_baseline = 10.0
# Equal weighting for both objectives
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Balance score: both must be optimized equally
balance_score = (ram_score + energy_score) / 2.0
# Step efficiency bonus
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1) # Up to 10% bonus
else:
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
# Combined: Balance (90%) + Step Bonus (10%)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# TASK 4: Advanced Efficiency (Hard Level - Difficulty 4)
# ============================================================================
def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 4: Advanced Efficiency
Target: Achieve RAM below 50% and energy below 4 kWh within 25 steps.
"""
ram_target = 50.0
energy_target = 4.0
max_steps = 25
ram_baseline = 100.0
energy_baseline = 10.0
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
balance_score = (ram_score + energy_score) / 2.0
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1)
else:
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# TASK 5: Expert Optimization (Master Level - Difficulty 5)
# ============================================================================
def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 5: Expert Optimization
Target: Master level: RAM below 40% and energy below 3 kWh within 30 steps.
"""
ram_target = 40.0
energy_target = 3.0
max_steps = 30
ram_baseline = 100.0
energy_baseline = 10.0
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
balance_score = (ram_score * 0.6) + (energy_score * 0.4)
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1)
else:
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# Registry and Metadata
# ============================================================================
# Explicit task grader mapping for validator tool detection
TASK_GRADERS: Dict[str, Dict[str, Any]] = {
"basic_ram_reduction": {
"grader": task_1_basic_ram_reduction_grader,
"name": "basic_ram_reduction",
"display_name": "Basic RAM Reduction",
"difficulty": 1,
"description": "Reduce RAM usage below 70%",
"target_ram": 70.0,
"target_energy": 7.5,
"max_steps": 10,
"category": "easy",
"real_world_application": "Memory optimization for resource-constrained devices and edge computing"
},
"energy_optimization": {
"grader": task_2_energy_optimization_grader,
"name": "energy_optimization",
"display_name": "Energy Optimization",
"difficulty": 2,
"description": "Reduce energy consumption below 6 kWh while maintaining RAM below 75%",
"target_ram": 75.0,
"target_energy": 6.0,
"max_steps": 15,
"category": "medium",
"real_world_application": "Energy efficiency for data centers and cloud infrastructure"
},
"balanced_optimization": {
"grader": task_3_balanced_optimization_grader,
"name": "balanced_optimization",
"display_name": "Balanced Optimization",
"difficulty": 3,
"description": "Balance RAM below 60% and energy below 5 kWh",
"target_ram": 60.0,
"target_energy": 5.0,
"max_steps": 20,
"category": "hard",
"real_world_application": "Production system optimization with dual constraints"
},
"advanced_efficiency": {
"grader": task_4_advanced_efficiency_grader,
"name": "advanced_efficiency",
"display_name": "Advanced Efficiency",
"difficulty": 4,
"description": "Achieve RAM below 50% and energy below 4 kWh",
"target_ram": 50.0,
"target_energy": 4.0,
"max_steps": 25,
"category": "hard",
"real_world_application": "Highly constrained embedded systems and IoT devices"
},
"expert_optimization": {
"grader": task_5_expert_optimization_grader,
"name": "expert_optimization",
"display_name": "Expert Optimization",
"difficulty": 5,
"description": "Master level: RAM below 40% and energy below 3 kWh",
"target_ram": 40.0,
"target_energy": 3.0,
"max_steps": 30,
"category": "expert",
"real_world_application": "Mission-critical space, deep-sea probes, and highly scaled edge clusters"
}
}
def get_grader(task_name: str) -> Callable:
"""
Get the grader function for a specific task.
Args:
task_name: Name of the task
Returns:
Grader function that takes an observation and returns a float score (0.0-1.0)
"""
if task_name not in TASK_GRADERS:
raise ValueError(f"Unknown task: {task_name}. Available tasks: {list(TASK_GRADERS.keys())}")
return TASK_GRADERS[task_name]["grader"]
def get_all_graders() -> Dict[str, Callable]:
"""
Get all available graders.
Returns:
Dictionary mapping task names to grader functions
"""
return {name: metadata["grader"] for name, metadata in TASK_GRADERS.items()}
def get_grader_metadata(task_name: str = None) -> Dict[str, Any]:
"""
Get metadata about graders.
Args:
task_name: Specific task name, or None for all tasks
Returns:
Metadata dictionary for the task(s)
"""
if task_name:
if task_name not in TASK_GRADERS:
raise ValueError(f"Unknown task: {task_name}")
# Return metadata without the grader function (for JSON serialization)
return {k: v for k, v in TASK_GRADERS[task_name].items() if k != "grader"}
else:
# Return all metadata
return {name: {k: v for k, v in metadata.items() if k != "grader"}
for name, metadata in TASK_GRADERS.items()}
if __name__ == "__main__":
# Example usage and testing
print("Available Task Graders:")
print("=" * 80)
for task_name, metadata in TASK_GRADERS.items():
print(f"\n{metadata['display_name']} (Difficulty {metadata['difficulty']})")
print(f" Name: {task_name}")
print(f" Description: {metadata['description']}")
print(f" Targets: RAM < {metadata['target_ram']}%, Energy < {metadata['target_energy']} kWh")
print(f" Max Steps: {metadata['max_steps']}")
print(f" Real-world: {metadata['real_world_application']}")