ethicsguard / tests /test_grader.py
GodreignElgin
Clamp final task scores to open interval
bbcb74d
from ethicsguard.generator import generate_queue
from ethicsguard.grader import grade_episode
from ethicsguard.models import EpisodeHistory, StepRecord
from ethicsguard.policy import DEFAULT_POLICY
def test_perfect_episode_scores_strictly_below_one() -> None:
queue = generate_queue(seed=1000, difficulty="easy")
ordered_queue = sorted(queue, key=lambda item: (item.priority_tier, item.id))
history = EpisodeHistory(
total_items=len(ordered_queue),
max_steps=15,
records=[
StepRecord(
step=index,
item_id=item.id,
action_type=item.ground_truth_action,
reward=0.3,
was_correct=True,
item_tier=item.priority_tier,
actioned=True,
)
for index, item in enumerate(ordered_queue, start=1)
],
)
assert grade_episode(history, queue, DEFAULT_POLICY) == 0.9999
def test_empty_progress_scores_strictly_above_zero() -> None:
queue = generate_queue(seed=1000, difficulty="easy")
history = EpisodeHistory(total_items=len(queue), max_steps=15, records=[])
assert grade_episode(history, queue, DEFAULT_POLICY) == 0.0001
def test_skipped_items_keep_score_in_open_interval() -> None:
queue = generate_queue(seed=1000, difficulty="easy")
history = EpisodeHistory(
total_items=len(queue),
max_steps=15,
records=[
StepRecord(
step=1,
item_id=queue[0].id,
action_type="skip",
reward=-1.0,
was_correct=False,
item_tier=queue[0].priority_tier,
actioned=False,
)
],
)
score = grade_episode(history, queue, DEFAULT_POLICY)
assert 0.0 < score < 1.0