VibecoderMcSwaggins commited on
Commit
97c317b
Β·
1 Parent(s): 10e320d

fix: address CodeRabbit review feedback

Browse files

SPEC_02 (CRITICAL):
- Update code examples to match actual implementation
- Replace non-existent create_test_orchestrator() with real pattern
- Show actual pytest fixtures (mock_search_handler, mock_judge_handler)
- Change "Files to Create" β†’ "Files Created"

orchestrator_magentic.py (nitpick):
- Make timeout configurable via constructor parameter
- timeout_seconds defaults to 300.0 (5 minutes)

All 149 tests passing.

docs/specs/SPEC_02_E2E_TESTING.md CHANGED
@@ -18,13 +18,21 @@ We don't know if:
18
  ### Level 1: Smoke Test (Does it run?)
19
 
20
  ```python
 
21
  @pytest.mark.e2e
22
- async def test_simple_mode_completes():
23
  """Verify Simple mode runs without crashing."""
24
  from src.orchestrator import Orchestrator
25
-
26
- # Mock the search tools to avoid real API calls
27
- orchestrator = create_test_orchestrator(mode="simple")
 
 
 
 
 
 
 
28
 
29
  events = []
30
  async for event in orchestrator.run("test query"):
@@ -79,23 +87,54 @@ async def test_output_quality():
79
 
80
  ### Mocking Strategy
81
 
82
- For CI/fast tests, mock external APIs:
83
 
84
  ```python
85
  @pytest.fixture
86
- def mock_pubmed():
87
- """Return realistic but fake PubMed results."""
88
- return [
89
- Evidence(
90
- content="Metformin improves insulin sensitivity...",
91
- citation=Citation(
92
- source="pubmed",
93
- title="Metformin in PCOS: A Meta-Analysis",
94
- url="https://pubmed.ncbi.nlm.nih.gov/12345678/",
95
- date="2024",
96
- )
 
 
 
 
 
 
 
 
97
  )
98
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  ```
100
 
101
  ### Integration Tests (Real APIs)
@@ -152,8 +191,8 @@ tests/
152
  - #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
153
  - #65: Demo timing (must fix first to make E2E tests practical)
154
 
155
- ## Files to Create
156
 
157
- 1. `tests/e2e/conftest.py` - E2E fixtures and mocks
158
- 2. `tests/e2e/test_simple_mode.py` - Simple mode tests
159
- 3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests
 
18
  ### Level 1: Smoke Test (Does it run?)
19
 
20
  ```python
21
+ @pytest.mark.asyncio
22
  @pytest.mark.e2e
23
+ async def test_simple_mode_completes(mock_search_handler, mock_judge_handler):
24
  """Verify Simple mode runs without crashing."""
25
  from src.orchestrator import Orchestrator
26
+ from src.utils.models import OrchestratorConfig
27
+
28
+ config = OrchestratorConfig(max_iterations=2)
29
+ orchestrator = Orchestrator(
30
+ search_handler=mock_search_handler,
31
+ judge_handler=mock_judge_handler,
32
+ config=config,
33
+ enable_analysis=False,
34
+ enable_embeddings=False,
35
+ )
36
 
37
  events = []
38
  async for event in orchestrator.run("test query"):
 
87
 
88
  ### Mocking Strategy
89
 
90
+ For CI/fast tests, mock external APIs via pytest fixtures in `tests/e2e/conftest.py`:
91
 
92
  ```python
93
  @pytest.fixture
94
+ def mock_search_handler():
95
+ """Return a mock search handler that returns fake evidence."""
96
+ from unittest.mock import MagicMock
97
+ from src.utils.models import Citation, Evidence, SearchResult
98
+
99
+ async def mock_execute(query: str):
100
+ return SearchResult(
101
+ evidence=[
102
+ Evidence(
103
+ content="Study on test query showing positive results...",
104
+ citation=Citation(
105
+ source="pubmed",
106
+ title="Study on test query",
107
+ url="https://pubmed.example.com/123",
108
+ date="2024",
109
+ ),
110
+ )
111
+ ],
112
+ sources_searched=["pubmed", "clinicaltrials"],
113
  )
114
+
115
+ mock = MagicMock()
116
+ mock.execute = mock_execute
117
+ return mock
118
+
119
+ @pytest.fixture
120
+ def mock_judge_handler():
121
+ """Return a mock judge that always says 'synthesize'."""
122
+ from unittest.mock import MagicMock
123
+ from src.utils.models import JudgeAssessment
124
+
125
+ async def mock_assess(evidence, query):
126
+ return JudgeAssessment(
127
+ sufficient=True,
128
+ reasoning="Mock: Evidence is sufficient",
129
+ suggested_refinements=[],
130
+ key_findings=["Finding 1", "Finding 2"],
131
+ evidence_gaps=[],
132
+ recommended_drugs=["MockDrug A", "MockDrug B"],
133
+ )
134
+
135
+ mock = MagicMock()
136
+ mock.assess = mock_assess
137
+ return mock
138
  ```
139
 
140
  ### Integration Tests (Real APIs)
 
191
  - #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
192
  - #65: Demo timing (must fix first to make E2E tests practical)
193
 
194
+ ## Files Created
195
 
196
+ 1. `tests/e2e/conftest.py` - E2E fixtures (mock_search_handler, mock_judge_handler)
197
+ 2. `tests/e2e/test_simple_mode.py` - Simple mode tests (2 tests)
198
+ 3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests (1 test, mocked workflow)
src/orchestrator_magentic.py CHANGED
@@ -45,6 +45,7 @@ class MagenticOrchestrator:
45
  max_rounds: int = 10,
46
  chat_client: OpenAIChatClient | None = None,
47
  api_key: str | None = None,
 
48
  ) -> None:
49
  """Initialize orchestrator.
50
 
@@ -52,12 +53,14 @@ class MagenticOrchestrator:
52
  max_rounds: Maximum coordination rounds
53
  chat_client: Optional shared chat client for agents
54
  api_key: Optional OpenAI API key (for BYOK)
 
55
  """
56
  # Validate requirements only if no key provided
57
  if not chat_client and not api_key:
58
  check_magentic_requirements()
59
 
60
  self._max_rounds = max_rounds
 
61
  self._chat_client: OpenAIChatClient | None
62
 
63
  if chat_client:
@@ -170,10 +173,9 @@ The final output should be a structured research report."""
170
 
171
  iteration = 0
172
  final_event_received = False
173
- demo_timeout_seconds = 300 # 5 minutes max
174
 
175
  try:
176
- async with asyncio.timeout(demo_timeout_seconds):
177
  async for event in workflow.run_stream(task):
178
  agent_event = self._process_event(event, iteration)
179
  if agent_event:
 
45
  max_rounds: int = 10,
46
  chat_client: OpenAIChatClient | None = None,
47
  api_key: str | None = None,
48
+ timeout_seconds: float = 300.0,
49
  ) -> None:
50
  """Initialize orchestrator.
51
 
 
53
  max_rounds: Maximum coordination rounds
54
  chat_client: Optional shared chat client for agents
55
  api_key: Optional OpenAI API key (for BYOK)
56
+ timeout_seconds: Maximum workflow duration (default: 5 minutes)
57
  """
58
  # Validate requirements only if no key provided
59
  if not chat_client and not api_key:
60
  check_magentic_requirements()
61
 
62
  self._max_rounds = max_rounds
63
+ self._timeout_seconds = timeout_seconds
64
  self._chat_client: OpenAIChatClient | None
65
 
66
  if chat_client:
 
173
 
174
  iteration = 0
175
  final_event_received = False
 
176
 
177
  try:
178
+ async with asyncio.timeout(self._timeout_seconds):
179
  async for event in workflow.run_stream(task):
180
  agent_event = self._process_event(event, iteration)
181
  if agent_event: