Spaces:
Sleeping
Sleeping
| """ | |
| env/curriculum.py — Self-Improving Curriculum Generator | |
| Tracks agent performance and auto-generates harder scenarios | |
| as the agent improves. This is Theme 4: Self-Improvement. | |
| """ | |
| import json | |
| import random | |
| from typing import Optional | |
| class CurriculumGenerator: | |
| """ | |
| Adaptive curriculum that gets harder as the agent improves. | |
| Tracks rolling average score. When agent consistently scores > threshold, | |
| upgrades difficulty tier automatically. | |
| This is what judges see as 'self-improvement': | |
| - Agent improves → environment generates harder scenarios | |
| - Harder scenarios → agent must improve further | |
| - Cycle continues → genuine lifelong learning signal | |
| """ | |
| # Thresholds to advance difficulty | |
| ADVANCE_THRESHOLD = 0.75 # Score needed to advance tier | |
| ADVANCE_WINDOW = 5 # Episodes to average over | |
| REGRESS_THRESHOLD = 0.30 # Score to drop back a tier | |
| def __init__(self): | |
| self.episode_scores: list[float] = [] | |
| self.current_tier: int = 0 # 0=easy, 1=medium, 2=hard, 3=ultra | |
| self.tier_names = ["easy", "medium", "hard", "ultra"] | |
| self.episodes_run = 0 | |
| self.tier_history: list[dict] = [] | |
| def record_episode(self, score: float) -> dict: | |
| """ | |
| Record episode score and check if tier should change. | |
| Returns dict with current tier and any tier change info. | |
| """ | |
| self.episode_scores.append(score) | |
| self.episodes_run += 1 | |
| # Keep rolling window | |
| if len(self.episode_scores) > 20: | |
| self.episode_scores = self.episode_scores[-20:] | |
| result = { | |
| "score": score, | |
| "current_tier": self.tier_names[self.current_tier], | |
| "tier_changed": False, | |
| "message": "", | |
| } | |
| # Check advance | |
| if len(self.episode_scores) >= self.ADVANCE_WINDOW: | |
| recent_avg = sum(self.episode_scores[-self.ADVANCE_WINDOW:]) / self.ADVANCE_WINDOW | |
| if recent_avg >= self.ADVANCE_THRESHOLD and self.current_tier < 3: | |
| self.current_tier += 1 | |
| result["tier_changed"] = True | |
| result["message"] = ( | |
| f"🎯 Tier advanced to {self.tier_names[self.current_tier]}! " | |
| f"Avg score {recent_avg:.2f} >= {self.ADVANCE_THRESHOLD}" | |
| ) | |
| self.tier_history.append({ | |
| "episode": self.episodes_run, | |
| "direction": "advance", | |
| "new_tier": self.tier_names[self.current_tier], | |
| "avg_score": recent_avg, | |
| }) | |
| elif recent_avg < self.REGRESS_THRESHOLD and self.current_tier > 0: | |
| self.current_tier -= 1 | |
| result["tier_changed"] = True | |
| result["message"] = ( | |
| f"📉 Tier dropped to {self.tier_names[self.current_tier]}. " | |
| f"Avg score {recent_avg:.2f} < {self.REGRESS_THRESHOLD}" | |
| ) | |
| self.tier_history.append({ | |
| "episode": self.episodes_run, | |
| "direction": "regress", | |
| "new_tier": self.tier_names[self.current_tier], | |
| "avg_score": recent_avg, | |
| }) | |
| result["current_tier"] = self.tier_names[self.current_tier] | |
| return result | |
| def get_next_scenario_difficulty(self) -> str: | |
| """Returns the difficulty string for the next episode.""" | |
| return self.tier_names[min(self.current_tier, 2)] # cap at hard | |
| def generate_ultra_scenario(self) -> dict: | |
| """ | |
| Generate an 'ultra hard' scenario dynamically for tier 3. | |
| More tables, more slow queries, tighter budget, conflicting constraints. | |
| """ | |
| n_tables = random.randint(5, 8) | |
| n_queries = random.randint(4, 6) | |
| max_steps = random.randint(30, 40) # Tight budget | |
| target = random.uniform(65.0, 72.0) | |
| table_names = random.sample([ | |
| "orders", "users", "products", "transactions", "events", | |
| "sessions", "logs", "notifications", "payments", "shipments" | |
| ], n_tables) | |
| tables = [] | |
| for name in table_names: | |
| tables.append({ | |
| "name": name, | |
| "rows": random.randint(100000, 2000000), | |
| "indexes": ["PRIMARY"], | |
| "size_mb": random.randint(200, 5000), | |
| }) | |
| slow_queries = [] | |
| for i in range(n_queries): | |
| t1, t2 = random.sample(table_names, 2) | |
| slow_queries.append({ | |
| "id": f"q{i+1}", | |
| "sql": f"SELECT * FROM {t1} WHERE user_id=? AND status=? AND created_at > ?", | |
| "avg_ms": random.randint(8000, 30000), | |
| "main_table": t1, | |
| "rows_examined": random.randint(100000, 2000000), | |
| }) | |
| return { | |
| "id": f"ultra_{random.randint(1000, 9999)}", | |
| "description": f"Ultra: {n_tables}-table DB, {n_queries} slow queries, {max_steps}-step budget.", | |
| "tables": tables, | |
| "slow_queries": slow_queries, | |
| "missing_index_hints": [], # No hints for ultra! | |
| "performance_score_baseline": round(random.uniform(2.0, 8.0), 1), | |
| "target_score": round(target, 1), | |
| "max_steps": max_steps, | |
| "category": "ultra", | |
| } | |
| def get_stats(self) -> dict: | |
| """Returns curriculum stats for /progress endpoint.""" | |
| recent = self.episode_scores[-5:] if self.episode_scores else [] | |
| return { | |
| "current_tier": self.tier_names[self.current_tier], | |
| "episodes_run": self.episodes_run, | |
| "recent_avg": round(sum(recent) / max(len(recent), 1), 3), | |
| "all_time_avg": round(sum(self.episode_scores) / max(len(self.episode_scores), 1), 3), | |
| "tier_history": self.tier_history[-5:], | |
| "advance_at": self.ADVANCE_THRESHOLD, | |
| } | |
| # Singleton | |
| curriculum = CurriculumGenerator() | |