harmonic-analysis / tests /test_eval.py
ohollo's picture
Agent evaluation
c5625fd
Raw
History Blame Contribute Delete
825 Bytes
import pytest
from scenarios import SCENARIOS, Assertion
_det_cases = [(s, a) for s in SCENARIOS for a in s.assertions if not a.is_judge]
_judge_cases = [(s, a) for s in SCENARIOS for a in s.assertions if a.is_judge]
_det_ids = [f"{s.name}::{a.description}" for s, a in _det_cases]
_judge_ids = [f"{s.name}::{a.description}" for s, a in _judge_cases]
@pytest.mark.parametrize("scenario,assertion", _det_cases, ids=_det_ids)
def test_deterministic(run_scenario, scenario, assertion):
result = run_scenario(scenario)
assert assertion.check(result), assertion.description
@pytest.mark.judge
@pytest.mark.parametrize("scenario,assertion", _judge_cases, ids=_judge_ids)
def test_judge(run_scenario, scenario, assertion):
result = run_scenario(scenario)
assert assertion.check(result), assertion.description