Spaces:

Hari15prasad
/

EduForge-Tutor

Sleeping

hari15prasad

Initial clean deployment to Hugging Face

6f44ddb 25 days ago

4.54 kB

	import unittest
	from src.rewards.engine import RewardEngine, SCAFFOLD_ACTIONS, DIRECT_TELL_ACTIONS, MIN_DIAGNOSTIC_DEPTH, EARLY_SESSION_THRESHOLD
	from src.environment.student_fsm import TutorAction, MisconceptionType

	class TestRewardEngine(unittest.TestCase):
	def setUp(self):
	self.engine = RewardEngine()

	def test_info_dump_penalty_early_session(self):
	# A direct tell (worked_example) on step 1 should get a heavy penalty
	total, components = self.engine.compute(
	confusion_before=8.0, confusion_after=6.0,
	attention_after=6.0,
	action=TutorAction.WORKED_EXAMPLE,
	episode_length=1,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	# penalty = 2.0 * max(0, 4 - 1 + 1) = 2.0 * 4 = 8.0
	self.assertEqual(components.breakdown["p_info_dump"], 8.0)

	def test_info_dump_no_penalty_late_session(self):
	# A direct tell on step 6 (past early session threshold) should have 0 penalty
	total, components = self.engine.compute(
	confusion_before=8.0, confusion_after=6.0,
	attention_after=6.0,
	action=TutorAction.WORKED_EXAMPLE,
	episode_length=6,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	self.assertEqual(components.breakdown["p_info_dump"], 0.0)

	def test_scaffold_confusion_drop_higher_reward(self):
	# Dropping confusion by 2.0 using a scaffold action
	_, comp_scaffold = self.engine.compute(
	confusion_before=8.0, confusion_after=6.0, attention_after=6.0,
	action=TutorAction.QUESTION, episode_length=3,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	# Dropping confusion by 2.0 using a direct tell
	_, comp_direct = self.engine.compute(
	confusion_before=8.0, confusion_after=6.0, attention_after=6.0,
	action=TutorAction.WORKED_EXAMPLE, episode_length=3,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	# Process reward for scaffold should be higher than direct
	# scaffold: 2.0 * 1.4 * 1.0 * 2.0 = 5.6
	# direct: 2.0 * 0.6 * 1.0 * 2.0 = 2.4
	self.assertGreater(comp_scaffold.r_process, comp_direct.r_process)
	self.assertAlmostEqual(comp_scaffold.r_process, 5.6)
	self.assertAlmostEqual(comp_direct.r_process, 2.4)

	def test_alignment_priority_tiered(self):
	# For PROCEDURAL, hint > question > worked_example
	# hint is 0th (2.5), question is 1st (1.5), worked_example is 2nd (0.5)
	_, comp_hint = self.engine.compute(
	confusion_before=8.0, confusion_after=8.0, attention_after=6.0,
	action=TutorAction.HINT, episode_length=3,
	misconception=MisconceptionType.PROCEDURAL,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	self.assertEqual(comp_hint.r_alignment, 2.5)

	_, comp_wex = self.engine.compute(
	confusion_before=8.0, confusion_after=8.0, attention_after=6.0,
	action=TutorAction.WORKED_EXAMPLE, episode_length=3,
	misconception=MisconceptionType.PROCEDURAL,
	action_text="test", format_valid=True, done=False, done_reason=None
	)
	self.assertEqual(comp_wex.r_alignment, 0.5)

	def test_scaffolded_resolution_bonus_requires_depth(self):
	# If resolving in 3 steps, no depth bonus should be given even if scaffolded
	self.engine._scaffold_streak = 3
	_, comp_early = self.engine.compute(
	confusion_before=2.0, confusion_after=1.0, attention_after=6.0,
	action=TutorAction.QUESTION, episode_length=3,
	action_text="test", format_valid=True, done=True, done_reason="success"
	)
	# Early completion -> no scaffolded resolution bonus
	self.assertEqual(comp_early.breakdown["r_scaffolded_resolution"], 0.0)

	# If resolving in 6 steps (>= MIN_DIAGNOSTIC_DEPTH), bonus is applied
	self.engine._scaffold_streak = 3
	_, comp_valid = self.engine.compute(
	confusion_before=2.0, confusion_after=1.0, attention_after=6.0,
	action=TutorAction.QUESTION, episode_length=6,
	action_text="test", format_valid=True, done=True, done_reason="success"
	)
	self.assertEqual(comp_valid.breakdown["r_scaffolded_resolution"], 8.0)

	if __name__ == '__main__':
	unittest.main()