VibecoderMcSwaggins commited on
Commit
a31cea6
·
1 Parent(s): 19e1e93

test(agents): verify judge termination prompt for SPEC-15

Browse files
tests/unit/agents/test_magentic_judge_termination.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for Magentic Judge termination logic."""
2
+
3
+ from unittest.mock import patch
4
+
5
+ from src.agents.magentic_agents import create_judge_agent
6
+
7
+
8
+ def test_judge_agent_has_termination_instructions() -> None:
9
+ """Judge agent must be created with explicit instructions for early termination."""
10
+ with patch("src.agents.magentic_agents.get_domain_config") as mock_config:
11
+ # Mock config to return empty strings so we test the hardcoded critical section
12
+ mock_config.return_value.judge_system_prompt = ""
13
+
14
+ with patch("src.agents.magentic_agents.ChatAgent") as mock_chat_agent_cls:
15
+ with patch("src.agents.magentic_agents.settings") as mock_settings:
16
+ mock_settings.openai_api_key = "sk-dummy"
17
+ mock_settings.openai_model = "gpt-4"
18
+
19
+ create_judge_agent()
20
+
21
+ # Verify ChatAgent was initialized with correct instructions
22
+ assert mock_chat_agent_cls.called
23
+ call_kwargs = mock_chat_agent_cls.call_args.kwargs
24
+ instructions = call_kwargs.get("instructions", "")
25
+
26
+ # Verify critical sections from Solution B
27
+ assert "CRITICAL OUTPUT FORMAT" in instructions
28
+ assert "SUFFICIENT EVIDENCE" in instructions
29
+ assert "confidence >= 70%" in instructions
30
+ assert "STOP SEARCHING" in instructions
31
+ assert "Delegate to ReportAgent NOW" in instructions
32
+
33
+
34
+ def test_judge_agent_uses_reasoning_temperature() -> None:
35
+ """Judge agent should be initialized with temperature=1.0."""
36
+ with patch("src.agents.magentic_agents.ChatAgent") as mock_chat_agent_cls:
37
+ with patch("src.agents.magentic_agents.settings") as mock_settings:
38
+ mock_settings.openai_api_key = "sk-dummy"
39
+ mock_settings.openai_model = "gpt-4"
40
+
41
+ create_judge_agent()
42
+
43
+ call_kwargs = mock_chat_agent_cls.call_args.kwargs
44
+ assert call_kwargs.get("temperature") == 1.0