AIDA / evals /test_tool_validation.py
destinyebuka's picture
new setup
7cd10a9
"""
Tool output validation eval — pure-function tests, no LLM.
Drives app.ai.agent.tool_validator.validate_tool_output over canonical
cases in evals/cases/tool_validation.yaml. Verifies that:
- well-formed outputs pass through unchanged
- malformed outputs are rewritten to (False, "...malformed...", None)
- failures (success=False) skip validation
- tools without a registered schema pass through
"""
import pytest
from app.ai.agent.tool_validator import validate_tool_output
from evals.harness import load_cases
CASES = load_cases("tool_validation.yaml")
@pytest.mark.parametrize("case", CASES, ids=[c["id"] for c in CASES])
def test_validate_tool_output(case):
success_in = case["success"]
message_in = case.get("message", "ok")
result_in = case["result_data"]
success_out, message_out, result_out = validate_tool_output(
tool_name=case["tool"],
success=success_in,
message=message_in,
result_data=result_in,
)
expected_ok = case["expected_ok"]
# The validator only flips success True→False; never False→True.
if "expected_success_after" in case:
assert success_out == case["expected_success_after"], (
f"Case {case['id']!r}: expected success={case['expected_success_after']} "
f"but got {success_out}"
)
elif expected_ok:
assert success_out is success_in, (
f"Case {case['id']!r}: validator flipped success unexpectedly "
f"(in={success_in}, out={success_out}, msg={message_out!r})"
)
assert result_out == result_in, (
f"Case {case['id']!r}: result_data was modified for a passing case"
)
else:
assert success_out is False, (
f"Case {case['id']!r}: expected validator to reject but got "
f"success={success_out}, msg={message_out!r}"
)
assert "malformed" in message_out.lower(), (
f"Case {case['id']!r}: rejection message should mention 'malformed', "
f"got {message_out!r}"
)
assert result_out is None, (
f"Case {case['id']!r}: rejected case should null result_data, "
f"got {result_out!r}"
)