File size: 14,749 Bytes
e00c2a1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Task Graders for Energy & Memory RAM Optimization Environment.
This module defines explicit graders for each task that evaluate agent performance
on a 0.0-1.0 scale. Each grader calculates scores based on:
- RAM usage optimization (percentage reduction from baseline)
- Energy consumption optimization (kWh reduction)
- Efficiency within step limits
- Real-world optimization metrics
The graders are exposed through the TASK_GRADERS registry for easy discovery.
"""
from typing import Callable, Dict, Any
from he_demo.models import EnergyOptimizationObservation
# ============================================================================
# TASK 1: Basic RAM Reduction (Easy Level - Difficulty 1)
# ============================================================================
def task_1_basic_ram_reduction_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 1: Basic RAM Reduction
Target: Reduce RAM usage below 70%, Energy below 7.5 kWh within 10 steps.
Real-world application: Reducing memory footprint is critical for:
- Running applications on resource-constrained devices
- Improving system responsiveness during high loads
- Preventing out-of-memory errors on edge devices
Scoring:
- RAM Score: 0.0 (80% baseline) → 1.0 (70% target)
- Energy Score: 0.0 (8.0 kWh baseline) → 1.0 (7.5 kWh target)
- Step Efficiency: Penalty if exceeding 10 steps
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_target = 70.0
energy_target = 7.5
max_steps = 10
# Baseline values for scoring normalization
ram_baseline = 100.0 # Maximum possible RAM
energy_baseline = 10.0 # Maximum possible energy
# Calculate RAM score: how close we are to the target (lower is better)
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
# Calculate Energy score: how close we are to the target (lower is better)
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Step efficiency penalty: agent should complete within max_steps
if observation.steps_taken <= max_steps:
step_efficiency = 1.0
else:
# Penalty of 10% per step over limit
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.1)
# Combined score: 40% RAM, 40% Energy, 20% Step Efficiency
composite_score = (ram_score * 0.4) + (energy_score * 0.4) + (step_efficiency * 0.2)
return round(composite_score, 3)
# ============================================================================
# TASK 2: Energy Optimization (Medium Level - Difficulty 2)
# ============================================================================
def task_2_energy_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 2: Energy Optimization
Target: Reduce energy consumption below 6 kWh while keeping RAM below 75% within 15 steps.
Real-world application: Energy optimization is essential for:
- Data centers reducing operational costs and carbon footprint
- Mobile/IoT devices extending battery life
- Cloud providers meeting sustainability goals
Scoring:
- Energy Score: 0.0 (8.0 kWh) → 1.0 (6.0 kWh target) [Primary focus - 50%]
- RAM Constraint Score: Penalty if RAM > 75% [Constraint - 25%]
- Step Efficiency: Bonus for completing within 15 steps [Efficiency - 25%]
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_constraint = 75.0 # Must stay below this
energy_target = 6.0 # Primary optimization target
max_steps = 15
# Baseline values
energy_baseline = 10.0
# Primary objective: Energy reduction
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Constraint: RAM must not exceed threshold
if observation.ram_usage <= ram_constraint:
ram_constraint_score = 1.0
else:
# Penalty for every 1% over constraint (max 1%)
overage = observation.ram_usage - ram_constraint
ram_constraint_score = max(0.0, 1.0 - (overage / 5.0)) # 5% buffer before full penalty
# Step efficiency
if observation.steps_taken <= max_steps:
step_efficiency = 1.0
else:
step_efficiency = max(0.0, 1.0 - (observation.steps_taken - max_steps) * 0.08)
# Combined: Energy (50%), RAM Constraint (25%), Step Efficiency (25%)
composite_score = (energy_score * 0.5) + (ram_constraint_score * 0.25) + (step_efficiency * 0.25)
return round(composite_score, 3)
# ============================================================================
# TASK 3: Balanced Optimization (Hard Level - Difficulty 3)
# ============================================================================
def task_3_balanced_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 3: Balanced Optimization
Target: Balance RAM below 60% and energy below 5 kWh within 20 steps.
Real-world application: Balanced optimization is required for:
- Production systems requiring both memory and energy efficiency
- Cloud services managing multi-tenant workloads
- Edge computing with dual constraints
Scoring:
- RAM Score: 0.0 (100%) → 1.0 (60% target) [50%]
- Energy Score: 0.0 (10 kWh) → 1.0 (5 kWh target) [50%]
- Step Efficiency Bonus: Extra credit for quick completion
Args:
observation: Current environment observation
Returns:
Score from 0.0 (worst) to 1.0 (best)
"""
# Target thresholds
ram_target = 60.0
energy_target = 5.0
max_steps = 20
# Baseline values
ram_baseline = 100.0
energy_baseline = 10.0
# Equal weighting for both objectives
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
# Balance score: both must be optimized equally
balance_score = (ram_score + energy_score) / 2.0
# Step efficiency bonus
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1) # Up to 10% bonus
else:
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05) # Up to -20% penalty
# Combined: Balance (90%) + Step Bonus (10%)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# TASK 4: Advanced Efficiency (Hard Level - Difficulty 4)
# ============================================================================
def task_4_advanced_efficiency_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 4: Advanced Efficiency
Target: Achieve RAM below 50% and energy below 4 kWh within 25 steps.
"""
ram_target = 50.0
energy_target = 4.0
max_steps = 25
ram_baseline = 100.0
energy_baseline = 10.0
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
balance_score = (ram_score + energy_score) / 2.0
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1)
else:
step_bonus = max(-0.2, -(observation.steps_taken - max_steps) * 0.05)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# TASK 5: Expert Optimization (Master Level - Difficulty 5)
# ============================================================================
def task_5_expert_optimization_grader(observation: EnergyOptimizationObservation) -> float:
"""
Grade Task 5: Expert Optimization
Target: Master level: RAM below 40% and energy below 3 kWh within 30 steps.
"""
ram_target = 40.0
energy_target = 3.0
max_steps = 30
ram_baseline = 100.0
energy_baseline = 10.0
ram_score = max(0.0, min(1.0, (ram_baseline - observation.ram_usage) / (ram_baseline - ram_target)))
energy_score = max(0.0, min(1.0, (energy_baseline - observation.energy_consumption) / (energy_baseline - energy_target)))
balance_score = (ram_score * 0.6) + (energy_score * 0.4)
if observation.steps_taken <= max_steps:
step_bonus = min(0.1, (max_steps - observation.steps_taken) / max_steps * 0.1)
else:
step_bonus = max(-0.3, -(observation.steps_taken - max_steps) * 0.05)
composite_score = max(0.0, min(1.0, (balance_score * 0.9) + step_bonus))
return round(composite_score, 3)
# ============================================================================
# Registry and Metadata
# ============================================================================
# Explicit task grader mapping for validator tool detection
TASK_GRADERS: Dict[str, Dict[str, Any]] = {
"basic_ram_reduction": {
"grader": task_1_basic_ram_reduction_grader,
"name": "basic_ram_reduction",
"display_name": "Basic RAM Reduction",
"difficulty": 1,
"description": "Reduce RAM usage below 70%",
"target_ram": 70.0,
"target_energy": 7.5,
"max_steps": 10,
"category": "easy",
"real_world_application": "Memory optimization for resource-constrained devices and edge computing"
},
"energy_optimization": {
"grader": task_2_energy_optimization_grader,
"name": "energy_optimization",
"display_name": "Energy Optimization",
"difficulty": 2,
"description": "Reduce energy consumption below 6 kWh while maintaining RAM below 75%",
"target_ram": 75.0,
"target_energy": 6.0,
"max_steps": 15,
"category": "medium",
"real_world_application": "Energy efficiency for data centers and cloud infrastructure"
},
"balanced_optimization": {
"grader": task_3_balanced_optimization_grader,
"name": "balanced_optimization",
"display_name": "Balanced Optimization",
"difficulty": 3,
"description": "Balance RAM below 60% and energy below 5 kWh",
"target_ram": 60.0,
"target_energy": 5.0,
"max_steps": 20,
"category": "hard",
"real_world_application": "Production system optimization with dual constraints"
},
"advanced_efficiency": {
"grader": task_4_advanced_efficiency_grader,
"name": "advanced_efficiency",
"display_name": "Advanced Efficiency",
"difficulty": 4,
"description": "Achieve RAM below 50% and energy below 4 kWh",
"target_ram": 50.0,
"target_energy": 4.0,
"max_steps": 25,
"category": "hard",
"real_world_application": "Highly constrained embedded systems and IoT devices"
},
"expert_optimization": {
"grader": task_5_expert_optimization_grader,
"name": "expert_optimization",
"display_name": "Expert Optimization",
"difficulty": 5,
"description": "Master level: RAM below 40% and energy below 3 kWh",
"target_ram": 40.0,
"target_energy": 3.0,
"max_steps": 30,
"category": "expert",
"real_world_application": "Mission-critical space, deep-sea probes, and highly scaled edge clusters"
}
}
def get_grader(task_name: str) -> Callable:
"""
Get the grader function for a specific task.
Args:
task_name: Name of the task
Returns:
Grader function that takes an observation and returns a float score (0.0-1.0)
"""
if task_name not in TASK_GRADERS:
raise ValueError(f"Unknown task: {task_name}. Available tasks: {list(TASK_GRADERS.keys())}")
return TASK_GRADERS[task_name]["grader"]
def get_all_graders() -> Dict[str, Callable]:
"""
Get all available graders.
Returns:
Dictionary mapping task names to grader functions
"""
return {name: metadata["grader"] for name, metadata in TASK_GRADERS.items()}
def get_grader_metadata(task_name: str = None) -> Dict[str, Any]:
"""
Get metadata about graders.
Args:
task_name: Specific task name, or None for all tasks
Returns:
Metadata dictionary for the task(s)
"""
if task_name:
if task_name not in TASK_GRADERS:
raise ValueError(f"Unknown task: {task_name}")
# Return metadata without the grader function (for JSON serialization)
return {k: v for k, v in TASK_GRADERS[task_name].items() if k != "grader"}
else:
# Return all metadata
return {name: {k: v for k, v in metadata.items() if k != "grader"}
for name, metadata in TASK_GRADERS.items()}
if __name__ == "__main__":
# Example usage and testing
print("Available Task Graders:")
print("=" * 80)
for task_name, metadata in TASK_GRADERS.items():
print(f"\n{metadata['display_name']} (Difficulty {metadata['difficulty']})")
print(f" Name: {task_name}")
print(f" Description: {metadata['description']}")
print(f" Targets: RAM < {metadata['target_ram']}%, Energy < {metadata['target_energy']} kWh")
print(f" Max Steps: {metadata['max_steps']}")
print(f" Real-world: {metadata['real_world_application']}")
|