""" env/curriculum.py — Self-Improving Curriculum Generator Tracks agent performance and auto-generates harder scenarios as the agent improves. This is Theme 4: Self-Improvement. """ import json import random from typing import Optional class CurriculumGenerator: """ Adaptive curriculum that gets harder as the agent improves. Tracks rolling average score. When agent consistently scores > threshold, upgrades difficulty tier automatically. This is what judges see as 'self-improvement': - Agent improves → environment generates harder scenarios - Harder scenarios → agent must improve further - Cycle continues → genuine lifelong learning signal """ # Thresholds to advance difficulty ADVANCE_THRESHOLD = 0.75 # Score needed to advance tier ADVANCE_WINDOW = 5 # Episodes to average over REGRESS_THRESHOLD = 0.30 # Score to drop back a tier def __init__(self): self.episode_scores: list[float] = [] self.current_tier: int = 0 # 0=easy, 1=medium, 2=hard, 3=ultra self.tier_names = ["easy", "medium", "hard", "ultra"] self.episodes_run = 0 self.tier_history: list[dict] = [] def record_episode(self, score: float) -> dict: """ Record episode score and check if tier should change. Returns dict with current tier and any tier change info. """ self.episode_scores.append(score) self.episodes_run += 1 # Keep rolling window if len(self.episode_scores) > 20: self.episode_scores = self.episode_scores[-20:] result = { "score": score, "current_tier": self.tier_names[self.current_tier], "tier_changed": False, "message": "", } # Check advance if len(self.episode_scores) >= self.ADVANCE_WINDOW: recent_avg = sum(self.episode_scores[-self.ADVANCE_WINDOW:]) / self.ADVANCE_WINDOW if recent_avg >= self.ADVANCE_THRESHOLD and self.current_tier < 3: self.current_tier += 1 result["tier_changed"] = True result["message"] = ( f"🎯 Tier advanced to {self.tier_names[self.current_tier]}! " f"Avg score {recent_avg:.2f} >= {self.ADVANCE_THRESHOLD}" ) self.tier_history.append({ "episode": self.episodes_run, "direction": "advance", "new_tier": self.tier_names[self.current_tier], "avg_score": recent_avg, }) elif recent_avg < self.REGRESS_THRESHOLD and self.current_tier > 0: self.current_tier -= 1 result["tier_changed"] = True result["message"] = ( f"📉 Tier dropped to {self.tier_names[self.current_tier]}. " f"Avg score {recent_avg:.2f} < {self.REGRESS_THRESHOLD}" ) self.tier_history.append({ "episode": self.episodes_run, "direction": "regress", "new_tier": self.tier_names[self.current_tier], "avg_score": recent_avg, }) result["current_tier"] = self.tier_names[self.current_tier] return result def get_next_scenario_difficulty(self) -> str: """Returns the difficulty string for the next episode.""" return self.tier_names[min(self.current_tier, 2)] # cap at hard def generate_ultra_scenario(self) -> dict: """ Generate an 'ultra hard' scenario dynamically for tier 3. More tables, more slow queries, tighter budget, conflicting constraints. """ n_tables = random.randint(5, 8) n_queries = random.randint(4, 6) max_steps = random.randint(30, 40) # Tight budget target = random.uniform(65.0, 72.0) table_names = random.sample([ "orders", "users", "products", "transactions", "events", "sessions", "logs", "notifications", "payments", "shipments" ], n_tables) tables = [] for name in table_names: tables.append({ "name": name, "rows": random.randint(100000, 2000000), "indexes": ["PRIMARY"], "size_mb": random.randint(200, 5000), }) slow_queries = [] for i in range(n_queries): t1, t2 = random.sample(table_names, 2) slow_queries.append({ "id": f"q{i+1}", "sql": f"SELECT * FROM {t1} WHERE user_id=? AND status=? AND created_at > ?", "avg_ms": random.randint(8000, 30000), "main_table": t1, "rows_examined": random.randint(100000, 2000000), }) return { "id": f"ultra_{random.randint(1000, 9999)}", "description": f"Ultra: {n_tables}-table DB, {n_queries} slow queries, {max_steps}-step budget.", "tables": tables, "slow_queries": slow_queries, "missing_index_hints": [], # No hints for ultra! "performance_score_baseline": round(random.uniform(2.0, 8.0), 1), "target_score": round(target, 1), "max_steps": max_steps, "category": "ultra", } def get_stats(self) -> dict: """Returns curriculum stats for /progress endpoint.""" recent = self.episode_scores[-5:] if self.episode_scores else [] return { "current_tier": self.tier_names[self.current_tier], "episodes_run": self.episodes_run, "recent_avg": round(sum(recent) / max(len(recent), 1), 3), "all_time_avg": round(sum(self.episode_scores) / max(len(self.episode_scores), 1), 3), "tier_history": self.tier_history[-5:], "advance_at": self.ADVANCE_THRESHOLD, } # Singleton curriculum = CurriculumGenerator()