File size: 2,222 Bytes
7cd10a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
Tool output validation eval — pure-function tests, no LLM.

Drives app.ai.agent.tool_validator.validate_tool_output over canonical
cases in evals/cases/tool_validation.yaml. Verifies that:
  - well-formed outputs pass through unchanged
  - malformed outputs are rewritten to (False, "...malformed...", None)
  - failures (success=False) skip validation
  - tools without a registered schema pass through
"""

import pytest

from app.ai.agent.tool_validator import validate_tool_output
from evals.harness import load_cases


CASES = load_cases("tool_validation.yaml")


@pytest.mark.parametrize("case", CASES, ids=[c["id"] for c in CASES])
def test_validate_tool_output(case):
    success_in = case["success"]
    message_in = case.get("message", "ok")
    result_in = case["result_data"]

    success_out, message_out, result_out = validate_tool_output(
        tool_name=case["tool"],
        success=success_in,
        message=message_in,
        result_data=result_in,
    )

    expected_ok = case["expected_ok"]
    # The validator only flips success True→False; never False→True.
    if "expected_success_after" in case:
        assert success_out == case["expected_success_after"], (
            f"Case {case['id']!r}: expected success={case['expected_success_after']} "
            f"but got {success_out}"
        )
    elif expected_ok:
        assert success_out is success_in, (
            f"Case {case['id']!r}: validator flipped success unexpectedly "
            f"(in={success_in}, out={success_out}, msg={message_out!r})"
        )
        assert result_out == result_in, (
            f"Case {case['id']!r}: result_data was modified for a passing case"
        )
    else:
        assert success_out is False, (
            f"Case {case['id']!r}: expected validator to reject but got "
            f"success={success_out}, msg={message_out!r}"
        )
        assert "malformed" in message_out.lower(), (
            f"Case {case['id']!r}: rejection message should mention 'malformed', "
            f"got {message_out!r}"
        )
        assert result_out is None, (
            f"Case {case['id']!r}: rejected case should null result_data, "
            f"got {result_out!r}"
        )