AIDA / evals /test_classify_intent.py
destinyebuka's picture
new setup
7cd10a9
"""
LLM classifier eval — opt-in, costs API calls.
Run before swapping the brain provider or after touching the
classification prompt:
pytest evals/test_classify_intent.py -m llm
Without the -m llm filter the cases are skipped, so this file is safe
to leave in CI on every commit.
"""
import os
import pytest
from app.ai.agent.agent_hub import classify_intent
from evals.harness import load_cases, make_state
CASES = load_cases("classify_intent.yaml")
pytestmark = [
pytest.mark.llm,
pytest.mark.skipif(
not (os.getenv("DEEPSEEK_API_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("MIMO_API_KEY")),
reason="No brain LLM API key set; skipping LLM classifier eval.",
),
]
@pytest.mark.parametrize("case", CASES, ids=[c["id"] for c in CASES])
@pytest.mark.asyncio
async def test_classify_intent(case):
state = make_state(
user_role=case["user_role"],
last_user_message=case["message"],
active_agent=case.get("current_agent"),
)
result = await classify_intent(
message=case["message"],
state=state,
current_agent=case.get("current_agent"),
)
assert result == case["expected"], (
f"Case {case['id']!r} expected {case['expected']!r} got {result!r}. "
f"Message: {case['message']!r}. Note: {case.get('note', '')}"
)