Spaces:
Running
Running
| """ | |
| Tool output validation eval — pure-function tests, no LLM. | |
| Drives app.ai.agent.tool_validator.validate_tool_output over canonical | |
| cases in evals/cases/tool_validation.yaml. Verifies that: | |
| - well-formed outputs pass through unchanged | |
| - malformed outputs are rewritten to (False, "...malformed...", None) | |
| - failures (success=False) skip validation | |
| - tools without a registered schema pass through | |
| """ | |
| import pytest | |
| from app.ai.agent.tool_validator import validate_tool_output | |
| from evals.harness import load_cases | |
| CASES = load_cases("tool_validation.yaml") | |
| def test_validate_tool_output(case): | |
| success_in = case["success"] | |
| message_in = case.get("message", "ok") | |
| result_in = case["result_data"] | |
| success_out, message_out, result_out = validate_tool_output( | |
| tool_name=case["tool"], | |
| success=success_in, | |
| message=message_in, | |
| result_data=result_in, | |
| ) | |
| expected_ok = case["expected_ok"] | |
| # The validator only flips success True→False; never False→True. | |
| if "expected_success_after" in case: | |
| assert success_out == case["expected_success_after"], ( | |
| f"Case {case['id']!r}: expected success={case['expected_success_after']} " | |
| f"but got {success_out}" | |
| ) | |
| elif expected_ok: | |
| assert success_out is success_in, ( | |
| f"Case {case['id']!r}: validator flipped success unexpectedly " | |
| f"(in={success_in}, out={success_out}, msg={message_out!r})" | |
| ) | |
| assert result_out == result_in, ( | |
| f"Case {case['id']!r}: result_data was modified for a passing case" | |
| ) | |
| else: | |
| assert success_out is False, ( | |
| f"Case {case['id']!r}: expected validator to reject but got " | |
| f"success={success_out}, msg={message_out!r}" | |
| ) | |
| assert "malformed" in message_out.lower(), ( | |
| f"Case {case['id']!r}: rejection message should mention 'malformed', " | |
| f"got {message_out!r}" | |
| ) | |
| assert result_out is None, ( | |
| f"Case {case['id']!r}: rejected case should null result_data, " | |
| f"got {result_out!r}" | |
| ) | |