EduForge-Tutor / test_engine.py
hari15prasad
Initial clean deployment to Hugging Face
6f44ddb
import unittest
from src.rewards.engine import RewardEngine, SCAFFOLD_ACTIONS, DIRECT_TELL_ACTIONS, MIN_DIAGNOSTIC_DEPTH, EARLY_SESSION_THRESHOLD
from src.environment.student_fsm import TutorAction, MisconceptionType
class TestRewardEngine(unittest.TestCase):
def setUp(self):
self.engine = RewardEngine()
def test_info_dump_penalty_early_session(self):
# A direct tell (worked_example) on step 1 should get a heavy penalty
total, components = self.engine.compute(
confusion_before=8.0, confusion_after=6.0,
attention_after=6.0,
action=TutorAction.WORKED_EXAMPLE,
episode_length=1,
action_text="test", format_valid=True, done=False, done_reason=None
)
# penalty = 2.0 * max(0, 4 - 1 + 1) = 2.0 * 4 = 8.0
self.assertEqual(components.breakdown["p_info_dump"], 8.0)
def test_info_dump_no_penalty_late_session(self):
# A direct tell on step 6 (past early session threshold) should have 0 penalty
total, components = self.engine.compute(
confusion_before=8.0, confusion_after=6.0,
attention_after=6.0,
action=TutorAction.WORKED_EXAMPLE,
episode_length=6,
action_text="test", format_valid=True, done=False, done_reason=None
)
self.assertEqual(components.breakdown["p_info_dump"], 0.0)
def test_scaffold_confusion_drop_higher_reward(self):
# Dropping confusion by 2.0 using a scaffold action
_, comp_scaffold = self.engine.compute(
confusion_before=8.0, confusion_after=6.0, attention_after=6.0,
action=TutorAction.QUESTION, episode_length=3,
action_text="test", format_valid=True, done=False, done_reason=None
)
# Dropping confusion by 2.0 using a direct tell
_, comp_direct = self.engine.compute(
confusion_before=8.0, confusion_after=6.0, attention_after=6.0,
action=TutorAction.WORKED_EXAMPLE, episode_length=3,
action_text="test", format_valid=True, done=False, done_reason=None
)
# Process reward for scaffold should be higher than direct
# scaffold: 2.0 * 1.4 * 1.0 * 2.0 = 5.6
# direct: 2.0 * 0.6 * 1.0 * 2.0 = 2.4
self.assertGreater(comp_scaffold.r_process, comp_direct.r_process)
self.assertAlmostEqual(comp_scaffold.r_process, 5.6)
self.assertAlmostEqual(comp_direct.r_process, 2.4)
def test_alignment_priority_tiered(self):
# For PROCEDURAL, hint > question > worked_example
# hint is 0th (2.5), question is 1st (1.5), worked_example is 2nd (0.5)
_, comp_hint = self.engine.compute(
confusion_before=8.0, confusion_after=8.0, attention_after=6.0,
action=TutorAction.HINT, episode_length=3,
misconception=MisconceptionType.PROCEDURAL,
action_text="test", format_valid=True, done=False, done_reason=None
)
self.assertEqual(comp_hint.r_alignment, 2.5)
_, comp_wex = self.engine.compute(
confusion_before=8.0, confusion_after=8.0, attention_after=6.0,
action=TutorAction.WORKED_EXAMPLE, episode_length=3,
misconception=MisconceptionType.PROCEDURAL,
action_text="test", format_valid=True, done=False, done_reason=None
)
self.assertEqual(comp_wex.r_alignment, 0.5)
def test_scaffolded_resolution_bonus_requires_depth(self):
# If resolving in 3 steps, no depth bonus should be given even if scaffolded
self.engine._scaffold_streak = 3
_, comp_early = self.engine.compute(
confusion_before=2.0, confusion_after=1.0, attention_after=6.0,
action=TutorAction.QUESTION, episode_length=3,
action_text="test", format_valid=True, done=True, done_reason="success"
)
# Early completion -> no scaffolded resolution bonus
self.assertEqual(comp_early.breakdown["r_scaffolded_resolution"], 0.0)
# If resolving in 6 steps (>= MIN_DIAGNOSTIC_DEPTH), bonus is applied
self.engine._scaffold_streak = 3
_, comp_valid = self.engine.compute(
confusion_before=2.0, confusion_after=1.0, attention_after=6.0,
action=TutorAction.QUESTION, episode_length=6,
action_text="test", format_valid=True, done=True, done_reason="success"
)
self.assertEqual(comp_valid.breakdown["r_scaffolded_resolution"], 8.0)
if __name__ == '__main__':
unittest.main()