Spaces:
Running
Running
| """ | |
| LLM classifier eval — opt-in, costs API calls. | |
| Run before swapping the brain provider or after touching the | |
| classification prompt: | |
| pytest evals/test_classify_intent.py -m llm | |
| Without the -m llm filter the cases are skipped, so this file is safe | |
| to leave in CI on every commit. | |
| """ | |
| import os | |
| import pytest | |
| from app.ai.agent.agent_hub import classify_intent | |
| from evals.harness import load_cases, make_state | |
| CASES = load_cases("classify_intent.yaml") | |
| pytestmark = [ | |
| pytest.mark.llm, | |
| pytest.mark.skipif( | |
| not (os.getenv("DEEPSEEK_API_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("MIMO_API_KEY")), | |
| reason="No brain LLM API key set; skipping LLM classifier eval.", | |
| ), | |
| ] | |
| async def test_classify_intent(case): | |
| state = make_state( | |
| user_role=case["user_role"], | |
| last_user_message=case["message"], | |
| active_agent=case.get("current_agent"), | |
| ) | |
| result = await classify_intent( | |
| message=case["message"], | |
| state=state, | |
| current_agent=case.get("current_agent"), | |
| ) | |
| assert result == case["expected"], ( | |
| f"Case {case['id']!r} expected {case['expected']!r} got {result!r}. " | |
| f"Message: {case['message']!r}. Note: {case.get('note', '')}" | |
| ) | |