Spaces:

Dash10107
/

AI-Tutor-A2C

Paused

Daksh C Jain

Complete revamp — professional A2C tutoring platform

5251c5a 25 days ago

2.94 kB

	"""
	AI Tutor RL Environment — models a student learning across N subjects.

	State: proficiency scores in [0, 1] for each subject
	Action: which subject to focus on (0..N-1)
	Reward: proficiency of the focused subject after the step
	→ encourages the agent to focus on subjects where it can make progress

	Dynamics:
	- Focused subject gains +15–30% (simulates a focused study session)
	- All other subjects decay -1–3% (simulates forgetting / drift)
	- Episode ends when all subjects reach 98%+ mastery
	"""

	from __future__ import annotations
	import numpy as np
	import gymnasium as gym
	from gymnasium import spaces

	SUBJECTS = [
	"Mathematics",
	"Physics",
	"Literature",
	"History",
	"Computer Science",
	]
	SUBJECT_COLORS = ["#6366f1", "#10b981", "#f59e0b", "#ec4899", "#3b82f6"]
	SUBJECT_ICONS = ["∑", "⚛", "📖", "🏛", "</>" ]

	N_SUBJECTS = len(SUBJECTS)


	class AITutorEnv(gym.Env):
	"""
	Gymnasium-compatible tutoring environment.

	Observation space: Box([0,1]^N) — normalised proficiency per subject
	Action space: Discrete(N) — which subject to study
	"""

	metadata = {"render_modes": []}

	def __init__(self, n_subjects: int = N_SUBJECTS):
	super().__init__()
	self.n = n_subjects
	self.observation_space = spaces.Box(
	low=0.0, high=1.0, shape=(n_subjects,), dtype=np.float32
	)
	self.action_space = spaces.Discrete(n_subjects)
	self.state = np.zeros(n_subjects, dtype=np.float32)
	self.step_count = 0
	self.max_steps = 200 # hard cap prevents infinite episodes

	def reset(self, seed: int \| None = None, options: dict \| None = None):
	super().reset(seed=seed)
	# Start students with low, varied proficiency
	self.state = np.random.uniform(0.05, 0.35, self.n).astype(np.float32)
	self.step_count = 0
	return self.state.copy(), {}

	def step(self, action: int):
	self.step_count += 1

	# Learning gain for focused subject
	gain = float(np.random.uniform(0.12, 0.28))
	self.state[action] = min(1.0, self.state[action] + gain)

	# Forgetting for all other subjects
	decay = np.random.uniform(0.005, 0.025, self.n)
	decay[action] = 0.0
	self.state = np.maximum(0.0, self.state - decay)

	reward = float(self.state[action]) # reward = current mastery
	mastered = bool(np.all(self.state >= 0.98))
	truncated = self.step_count >= self.max_steps
	done = mastered

	info = {"mastered": mastered, "step": self.step_count}
	return self.state.copy(), reward, done, truncated, info

	def set_state(self, proficiency_pct: list[float]):
	"""Inject a specific state (from UI sliders, in 0–100 range)."""
	self.state = np.clip(
	np.array(proficiency_pct, dtype=np.float32) / 100.0, 0.0, 1.0
	)
	self.step_count = 0