from __future__ import annotations from typing import List, Optional, Tuple _TIERS = ["easy", "medium", "hard"] _DEFAULT_THRESHOLDS = { "easy": 4.0, # promote easy→medium when 10-ep avg >= 4.0 "medium": 3.5, # promote medium→hard when 10-ep avg >= 3.5 } _WINDOW = 10 class CurriculumController: def __init__( self, start_tier: str = "easy", thresholds: Optional[dict] = None, ) -> None: self._tier = start_tier self._thresholds = thresholds if thresholds is not None else dict(_DEFAULT_THRESHOLDS) self._episode_idx = 0 self._history: List[Tuple[int, str, float]] = [] self.promotion_log: List[Tuple[int, str]] = [] def after_episode(self, total_reward: float) -> Optional[str]: self._history.append((self._episode_idx, self._tier, total_reward)) self._episode_idx += 1 recent = [r for _, t, r in self._history[-_WINDOW:] if t == self._tier] if len(recent) < _WINDOW: return None avg = sum(recent) / len(recent) tier_idx = _TIERS.index(self._tier) # Promote promote_threshold = self._thresholds.get(self._tier) if promote_threshold is not None and avg >= promote_threshold: if tier_idx < len(_TIERS) - 1: new_tier = _TIERS[tier_idx + 1] self._tier = new_tier self.promotion_log.append((self._episode_idx - 1, new_tier)) return new_tier # Demote if tier_idx > 0: prev_tier = _TIERS[tier_idx - 1] demote_threshold = self._thresholds.get(prev_tier) if demote_threshold is not None and avg < demote_threshold * 0.5: self._tier = prev_tier self.promotion_log.append((self._episode_idx - 1, prev_tier)) return prev_tier return None def get_tier(self) -> str: return self._tier def get_history(self) -> List[Tuple[int, str, float]]: return list(self._history)