""" LLM classifier eval — opt-in, costs API calls. Run before swapping the brain provider or after touching the classification prompt: pytest evals/test_classify_intent.py -m llm Without the -m llm filter the cases are skipped, so this file is safe to leave in CI on every commit. """ import os import pytest from app.ai.agent.agent_hub import classify_intent from evals.harness import load_cases, make_state CASES = load_cases("classify_intent.yaml") pytestmark = [ pytest.mark.llm, pytest.mark.skipif( not (os.getenv("DEEPSEEK_API_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("MIMO_API_KEY")), reason="No brain LLM API key set; skipping LLM classifier eval.", ), ] @pytest.mark.parametrize("case", CASES, ids=[c["id"] for c in CASES]) @pytest.mark.asyncio async def test_classify_intent(case): state = make_state( user_role=case["user_role"], last_user_message=case["message"], active_agent=case.get("current_agent"), ) result = await classify_intent( message=case["message"], state=state, current_agent=case.get("current_agent"), ) assert result == case["expected"], ( f"Case {case['id']!r} expected {case['expected']!r} got {result!r}. " f"Message: {case['message']!r}. Note: {case.get('note', '')}" )