from ethicsguard.generator import generate_queue from ethicsguard.grader import grade_episode from ethicsguard.models import EpisodeHistory, StepRecord from ethicsguard.policy import DEFAULT_POLICY def test_perfect_episode_scores_strictly_below_one() -> None: queue = generate_queue(seed=1000, difficulty="easy") ordered_queue = sorted(queue, key=lambda item: (item.priority_tier, item.id)) history = EpisodeHistory( total_items=len(ordered_queue), max_steps=15, records=[ StepRecord( step=index, item_id=item.id, action_type=item.ground_truth_action, reward=0.3, was_correct=True, item_tier=item.priority_tier, actioned=True, ) for index, item in enumerate(ordered_queue, start=1) ], ) assert grade_episode(history, queue, DEFAULT_POLICY) == 0.9999 def test_empty_progress_scores_strictly_above_zero() -> None: queue = generate_queue(seed=1000, difficulty="easy") history = EpisodeHistory(total_items=len(queue), max_steps=15, records=[]) assert grade_episode(history, queue, DEFAULT_POLICY) == 0.0001 def test_skipped_items_keep_score_in_open_interval() -> None: queue = generate_queue(seed=1000, difficulty="easy") history = EpisodeHistory( total_items=len(queue), max_steps=15, records=[ StepRecord( step=1, item_id=queue[0].id, action_type="skip", reward=-1.0, was_correct=False, item_tier=queue[0].priority_tier, actioned=False, ) ], ) score = grade_episode(history, queue, DEFAULT_POLICY) assert 0.0 < score < 1.0