Commit
·
a31cea6
1
Parent(s):
19e1e93
test(agents): verify judge termination prompt for SPEC-15
Browse files
tests/unit/agents/test_magentic_judge_termination.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Magentic Judge termination logic."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import patch
|
| 4 |
+
|
| 5 |
+
from src.agents.magentic_agents import create_judge_agent
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_judge_agent_has_termination_instructions() -> None:
|
| 9 |
+
"""Judge agent must be created with explicit instructions for early termination."""
|
| 10 |
+
with patch("src.agents.magentic_agents.get_domain_config") as mock_config:
|
| 11 |
+
# Mock config to return empty strings so we test the hardcoded critical section
|
| 12 |
+
mock_config.return_value.judge_system_prompt = ""
|
| 13 |
+
|
| 14 |
+
with patch("src.agents.magentic_agents.ChatAgent") as mock_chat_agent_cls:
|
| 15 |
+
with patch("src.agents.magentic_agents.settings") as mock_settings:
|
| 16 |
+
mock_settings.openai_api_key = "sk-dummy"
|
| 17 |
+
mock_settings.openai_model = "gpt-4"
|
| 18 |
+
|
| 19 |
+
create_judge_agent()
|
| 20 |
+
|
| 21 |
+
# Verify ChatAgent was initialized with correct instructions
|
| 22 |
+
assert mock_chat_agent_cls.called
|
| 23 |
+
call_kwargs = mock_chat_agent_cls.call_args.kwargs
|
| 24 |
+
instructions = call_kwargs.get("instructions", "")
|
| 25 |
+
|
| 26 |
+
# Verify critical sections from Solution B
|
| 27 |
+
assert "CRITICAL OUTPUT FORMAT" in instructions
|
| 28 |
+
assert "SUFFICIENT EVIDENCE" in instructions
|
| 29 |
+
assert "confidence >= 70%" in instructions
|
| 30 |
+
assert "STOP SEARCHING" in instructions
|
| 31 |
+
assert "Delegate to ReportAgent NOW" in instructions
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_judge_agent_uses_reasoning_temperature() -> None:
|
| 35 |
+
"""Judge agent should be initialized with temperature=1.0."""
|
| 36 |
+
with patch("src.agents.magentic_agents.ChatAgent") as mock_chat_agent_cls:
|
| 37 |
+
with patch("src.agents.magentic_agents.settings") as mock_settings:
|
| 38 |
+
mock_settings.openai_api_key = "sk-dummy"
|
| 39 |
+
mock_settings.openai_model = "gpt-4"
|
| 40 |
+
|
| 41 |
+
create_judge_agent()
|
| 42 |
+
|
| 43 |
+
call_kwargs = mock_chat_agent_cls.call_args.kwargs
|
| 44 |
+
assert call_kwargs.get("temperature") == 1.0
|