""" Tool output validation eval — pure-function tests, no LLM. Drives app.ai.agent.tool_validator.validate_tool_output over canonical cases in evals/cases/tool_validation.yaml. Verifies that: - well-formed outputs pass through unchanged - malformed outputs are rewritten to (False, "...malformed...", None) - failures (success=False) skip validation - tools without a registered schema pass through """ import pytest from app.ai.agent.tool_validator import validate_tool_output from evals.harness import load_cases CASES = load_cases("tool_validation.yaml") @pytest.mark.parametrize("case", CASES, ids=[c["id"] for c in CASES]) def test_validate_tool_output(case): success_in = case["success"] message_in = case.get("message", "ok") result_in = case["result_data"] success_out, message_out, result_out = validate_tool_output( tool_name=case["tool"], success=success_in, message=message_in, result_data=result_in, ) expected_ok = case["expected_ok"] # The validator only flips success True→False; never False→True. if "expected_success_after" in case: assert success_out == case["expected_success_after"], ( f"Case {case['id']!r}: expected success={case['expected_success_after']} " f"but got {success_out}" ) elif expected_ok: assert success_out is success_in, ( f"Case {case['id']!r}: validator flipped success unexpectedly " f"(in={success_in}, out={success_out}, msg={message_out!r})" ) assert result_out == result_in, ( f"Case {case['id']!r}: result_data was modified for a passing case" ) else: assert success_out is False, ( f"Case {case['id']!r}: expected validator to reject but got " f"success={success_out}, msg={message_out!r}" ) assert "malformed" in message_out.lower(), ( f"Case {case['id']!r}: rejection message should mention 'malformed', " f"got {message_out!r}" ) assert result_out is None, ( f"Case {case['id']!r}: rejected case should null result_data, " f"got {result_out!r}" )