Spaces:
Sleeping
Sleeping
| from ethicsguard.generator import generate_queue | |
| from ethicsguard.grader import grade_episode | |
| from ethicsguard.models import EpisodeHistory, StepRecord | |
| from ethicsguard.policy import DEFAULT_POLICY | |
| def test_perfect_episode_scores_strictly_below_one() -> None: | |
| queue = generate_queue(seed=1000, difficulty="easy") | |
| ordered_queue = sorted(queue, key=lambda item: (item.priority_tier, item.id)) | |
| history = EpisodeHistory( | |
| total_items=len(ordered_queue), | |
| max_steps=15, | |
| records=[ | |
| StepRecord( | |
| step=index, | |
| item_id=item.id, | |
| action_type=item.ground_truth_action, | |
| reward=0.3, | |
| was_correct=True, | |
| item_tier=item.priority_tier, | |
| actioned=True, | |
| ) | |
| for index, item in enumerate(ordered_queue, start=1) | |
| ], | |
| ) | |
| assert grade_episode(history, queue, DEFAULT_POLICY) == 0.9999 | |
| def test_empty_progress_scores_strictly_above_zero() -> None: | |
| queue = generate_queue(seed=1000, difficulty="easy") | |
| history = EpisodeHistory(total_items=len(queue), max_steps=15, records=[]) | |
| assert grade_episode(history, queue, DEFAULT_POLICY) == 0.0001 | |
| def test_skipped_items_keep_score_in_open_interval() -> None: | |
| queue = generate_queue(seed=1000, difficulty="easy") | |
| history = EpisodeHistory( | |
| total_items=len(queue), | |
| max_steps=15, | |
| records=[ | |
| StepRecord( | |
| step=1, | |
| item_id=queue[0].id, | |
| action_type="skip", | |
| reward=-1.0, | |
| was_correct=False, | |
| item_tier=queue[0].priority_tier, | |
| actioned=False, | |
| ) | |
| ], | |
| ) | |
| score = grade_episode(history, queue, DEFAULT_POLICY) | |
| assert 0.0 < score < 1.0 | |