Spaces:
Runtime error
Runtime error
| """ | |
| Episode Tier Lock — ensures both baseline and specialist output are scored | |
| through the SAME tier. Prevents the circular dependency bug from v3. | |
| """ | |
| from __future__ import annotations | |
| import random | |
| from enum import IntEnum | |
| from dataclasses import dataclass | |
| class RewardTier(IntEnum): | |
| TIER_0 = 0 # Free structural checks | |
| TIER_1 = 1 # Embedding similarity | |
| TIER_2 = 2 # Small LLM micro-judge (GPT-4o-mini) | |
| TIER_3 = 3 # Full LLM-as-judge (checkpoints only) | |
| def _load_tier_config() -> tuple[dict, dict]: | |
| """Load tier_map and tier2_sample_rates from training_config.yaml at import time.""" | |
| import yaml, os | |
| config_path = os.path.join( | |
| os.path.dirname(__file__), "..", "configs", "training_config.yaml" | |
| ) | |
| try: | |
| with open(config_path) as f: | |
| reward_cfg = yaml.safe_load(f).get("reward", {}) | |
| tier_map_raw = reward_cfg.get("tier_map", { | |
| "atomic": 0, "simple": 1, "moderate": 1, "complex": 2, "enterprise": 2, | |
| }) | |
| tier_map = {k: RewardTier(v) for k, v in tier_map_raw.items()} | |
| sample_rates = reward_cfg.get("tier2_sample_rates", { | |
| "moderate": 0.30, "complex": 1.00, "enterprise": 1.00, | |
| }) | |
| return tier_map, sample_rates | |
| except Exception: | |
| return ( | |
| {"atomic": RewardTier.TIER_0, "simple": RewardTier.TIER_1, | |
| "moderate": RewardTier.TIER_1, "complex": RewardTier.TIER_2, | |
| "enterprise": RewardTier.TIER_2}, | |
| {"moderate": 0.30, "complex": 1.00, "enterprise": 1.00}, | |
| ) | |
| TIER_MAP, TIER2_SAMPLE_RATES = _load_tier_config() | |
| class EpisodeTierLock: | |
| """ | |
| Locked once at episode start. Both generalist and specialist outputs | |
| are scored through this exact tier. No drift. | |
| """ | |
| complexity_class: str | |
| locked_tier: RewardTier | |
| tier2_sample_rate: float | |
| def for_task(cls, complexity_class: str) -> "EpisodeTierLock": | |
| tier = TIER_MAP.get(complexity_class, RewardTier.TIER_1) | |
| sample_rate = TIER2_SAMPLE_RATES.get(complexity_class, 0.0) | |
| if complexity_class == "moderate" and random.random() < sample_rate: | |
| tier = RewardTier.TIER_2 | |
| return cls( | |
| complexity_class=complexity_class, | |
| locked_tier=tier, | |
| tier2_sample_rate=sample_rate, | |
| ) | |