Spaces:
Paused
Paused
| """ | |
| AI Tutor RL Environment β models a student learning across N subjects. | |
| State: proficiency scores in [0, 1] for each subject | |
| Action: which subject to focus on (0..N-1) | |
| Reward: proficiency of the focused subject after the step | |
| β encourages the agent to focus on subjects where it can make progress | |
| Dynamics: | |
| - Focused subject gains +15β30% (simulates a focused study session) | |
| - All other subjects decay -1β3% (simulates forgetting / drift) | |
| - Episode ends when all subjects reach 98%+ mastery | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| import gymnasium as gym | |
| from gymnasium import spaces | |
| SUBJECTS = [ | |
| "Mathematics", | |
| "Physics", | |
| "Literature", | |
| "History", | |
| "Computer Science", | |
| ] | |
| SUBJECT_COLORS = ["#6366f1", "#10b981", "#f59e0b", "#ec4899", "#3b82f6"] | |
| SUBJECT_ICONS = ["β", "β", "π", "π", "</>" ] | |
| N_SUBJECTS = len(SUBJECTS) | |
| class AITutorEnv(gym.Env): | |
| """ | |
| Gymnasium-compatible tutoring environment. | |
| Observation space: Box([0,1]^N) β normalised proficiency per subject | |
| Action space: Discrete(N) β which subject to study | |
| """ | |
| metadata = {"render_modes": []} | |
| def __init__(self, n_subjects: int = N_SUBJECTS): | |
| super().__init__() | |
| self.n = n_subjects | |
| self.observation_space = spaces.Box( | |
| low=0.0, high=1.0, shape=(n_subjects,), dtype=np.float32 | |
| ) | |
| self.action_space = spaces.Discrete(n_subjects) | |
| self.state = np.zeros(n_subjects, dtype=np.float32) | |
| self.step_count = 0 | |
| self.max_steps = 200 # hard cap prevents infinite episodes | |
| def reset(self, seed: int | None = None, options: dict | None = None): | |
| super().reset(seed=seed) | |
| # Start students with low, varied proficiency | |
| self.state = np.random.uniform(0.05, 0.35, self.n).astype(np.float32) | |
| self.step_count = 0 | |
| return self.state.copy(), {} | |
| def step(self, action: int): | |
| self.step_count += 1 | |
| # Learning gain for focused subject | |
| gain = float(np.random.uniform(0.12, 0.28)) | |
| self.state[action] = min(1.0, self.state[action] + gain) | |
| # Forgetting for all other subjects | |
| decay = np.random.uniform(0.005, 0.025, self.n) | |
| decay[action] = 0.0 | |
| self.state = np.maximum(0.0, self.state - decay) | |
| reward = float(self.state[action]) # reward = current mastery | |
| mastered = bool(np.all(self.state >= 0.98)) | |
| truncated = self.step_count >= self.max_steps | |
| done = mastered | |
| info = {"mastered": mastered, "step": self.step_count} | |
| return self.state.copy(), reward, done, truncated, info | |
| def set_state(self, proficiency_pct: list[float]): | |
| """Inject a specific state (from UI sliders, in 0β100 range).""" | |
| self.state = np.clip( | |
| np.array(proficiency_pct, dtype=np.float32) / 100.0, 0.0, 1.0 | |
| ) | |
| self.step_count = 0 | |