Spaces:
Sleeping
Sleeping
| """ | |
| tasks/task_easy.py -- Easy task: 2 variables, low noise. | |
| The agent must discover a single causal relationship between two abstract | |
| variables in a low-noise setting with a generous budget. | |
| Grader returns 0.0-1.0 based on: | |
| - Hypothesis accuracy (60%) | |
| - Efficiency bonus (20%) | |
| - Calibration (20%) | |
| """ | |
| from __future__ import annotations | |
| from typing import Any | |
| TASK_EASY = { | |
| "id": "easy", | |
| "name": "Easy -- Single-Edge Discovery", | |
| "description": ( | |
| "Discover the causal relationship between two abstract variables. " | |
| "Low noise (sigma=0.05), generous budget (12 steps)." | |
| ), | |
| "difficulty": "easy", | |
| "reset_kwargs": { | |
| "noise_level": "low", | |
| "domain": "system_alpha", | |
| "seed": 42, | |
| }, | |
| } | |
| def grade_easy(episode_result: dict[str, Any]) -> float: | |
| """ | |
| Grade an easy-task episode. Returns a score in [0.0, 1.0]. | |
| Args: | |
| episode_result: Dict containing at minimum: | |
| - accuracy_score (float): from the rubric | |
| - efficiency_bonus (float): from the rubric | |
| - calibration_score (float): from the rubric | |
| - total_episode_reward (float): sum of all rubric components | |
| Returns: | |
| Normalized score between 0.0 and 1.0. | |
| """ | |
| accuracy = episode_result.get("accuracy_score", 0.0) | |
| efficiency = episode_result.get("efficiency_bonus", 0.0) | |
| calibration = episode_result.get("calibration_score", 0.0) | |
| raw = ( | |
| 0.60 * min(accuracy, 1.0) | |
| + 0.20 * min(efficiency / 0.15, 1.0) | |
| + 0.20 * min(calibration / 0.20, 1.0) | |
| ) | |
| return round(max(0.0, min(1.0, raw)), 4) | |