Commit
Β·
97c317b
1
Parent(s):
10e320d
fix: address CodeRabbit review feedback
Browse filesSPEC_02 (CRITICAL):
- Update code examples to match actual implementation
- Replace non-existent create_test_orchestrator() with real pattern
- Show actual pytest fixtures (mock_search_handler, mock_judge_handler)
- Change "Files to Create" β "Files Created"
orchestrator_magentic.py (nitpick):
- Make timeout configurable via constructor parameter
- timeout_seconds defaults to 300.0 (5 minutes)
All 149 tests passing.
docs/specs/SPEC_02_E2E_TESTING.md
CHANGED
|
@@ -18,13 +18,21 @@ We don't know if:
|
|
| 18 |
### Level 1: Smoke Test (Does it run?)
|
| 19 |
|
| 20 |
```python
|
|
|
|
| 21 |
@pytest.mark.e2e
|
| 22 |
-
async def test_simple_mode_completes():
|
| 23 |
"""Verify Simple mode runs without crashing."""
|
| 24 |
from src.orchestrator import Orchestrator
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
events = []
|
| 30 |
async for event in orchestrator.run("test query"):
|
|
@@ -79,23 +87,54 @@ async def test_output_quality():
|
|
| 79 |
|
| 80 |
### Mocking Strategy
|
| 81 |
|
| 82 |
-
For CI/fast tests, mock external APIs
|
| 83 |
|
| 84 |
```python
|
| 85 |
@pytest.fixture
|
| 86 |
-
def
|
| 87 |
-
"""Return
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
```
|
| 100 |
|
| 101 |
### Integration Tests (Real APIs)
|
|
@@ -152,8 +191,8 @@ tests/
|
|
| 152 |
- #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
|
| 153 |
- #65: Demo timing (must fix first to make E2E tests practical)
|
| 154 |
|
| 155 |
-
## Files
|
| 156 |
|
| 157 |
-
1. `tests/e2e/conftest.py` - E2E fixtures
|
| 158 |
-
2. `tests/e2e/test_simple_mode.py` - Simple mode tests
|
| 159 |
-
3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests
|
|
|
|
| 18 |
### Level 1: Smoke Test (Does it run?)
|
| 19 |
|
| 20 |
```python
|
| 21 |
+
@pytest.mark.asyncio
|
| 22 |
@pytest.mark.e2e
|
| 23 |
+
async def test_simple_mode_completes(mock_search_handler, mock_judge_handler):
|
| 24 |
"""Verify Simple mode runs without crashing."""
|
| 25 |
from src.orchestrator import Orchestrator
|
| 26 |
+
from src.utils.models import OrchestratorConfig
|
| 27 |
+
|
| 28 |
+
config = OrchestratorConfig(max_iterations=2)
|
| 29 |
+
orchestrator = Orchestrator(
|
| 30 |
+
search_handler=mock_search_handler,
|
| 31 |
+
judge_handler=mock_judge_handler,
|
| 32 |
+
config=config,
|
| 33 |
+
enable_analysis=False,
|
| 34 |
+
enable_embeddings=False,
|
| 35 |
+
)
|
| 36 |
|
| 37 |
events = []
|
| 38 |
async for event in orchestrator.run("test query"):
|
|
|
|
| 87 |
|
| 88 |
### Mocking Strategy
|
| 89 |
|
| 90 |
+
For CI/fast tests, mock external APIs via pytest fixtures in `tests/e2e/conftest.py`:
|
| 91 |
|
| 92 |
```python
|
| 93 |
@pytest.fixture
|
| 94 |
+
def mock_search_handler():
|
| 95 |
+
"""Return a mock search handler that returns fake evidence."""
|
| 96 |
+
from unittest.mock import MagicMock
|
| 97 |
+
from src.utils.models import Citation, Evidence, SearchResult
|
| 98 |
+
|
| 99 |
+
async def mock_execute(query: str):
|
| 100 |
+
return SearchResult(
|
| 101 |
+
evidence=[
|
| 102 |
+
Evidence(
|
| 103 |
+
content="Study on test query showing positive results...",
|
| 104 |
+
citation=Citation(
|
| 105 |
+
source="pubmed",
|
| 106 |
+
title="Study on test query",
|
| 107 |
+
url="https://pubmed.example.com/123",
|
| 108 |
+
date="2024",
|
| 109 |
+
),
|
| 110 |
+
)
|
| 111 |
+
],
|
| 112 |
+
sources_searched=["pubmed", "clinicaltrials"],
|
| 113 |
)
|
| 114 |
+
|
| 115 |
+
mock = MagicMock()
|
| 116 |
+
mock.execute = mock_execute
|
| 117 |
+
return mock
|
| 118 |
+
|
| 119 |
+
@pytest.fixture
|
| 120 |
+
def mock_judge_handler():
|
| 121 |
+
"""Return a mock judge that always says 'synthesize'."""
|
| 122 |
+
from unittest.mock import MagicMock
|
| 123 |
+
from src.utils.models import JudgeAssessment
|
| 124 |
+
|
| 125 |
+
async def mock_assess(evidence, query):
|
| 126 |
+
return JudgeAssessment(
|
| 127 |
+
sufficient=True,
|
| 128 |
+
reasoning="Mock: Evidence is sufficient",
|
| 129 |
+
suggested_refinements=[],
|
| 130 |
+
key_findings=["Finding 1", "Finding 2"],
|
| 131 |
+
evidence_gaps=[],
|
| 132 |
+
recommended_drugs=["MockDrug A", "MockDrug B"],
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
mock = MagicMock()
|
| 136 |
+
mock.assess = mock_assess
|
| 137 |
+
return mock
|
| 138 |
```
|
| 139 |
|
| 140 |
### Integration Tests (Real APIs)
|
|
|
|
| 191 |
- #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
|
| 192 |
- #65: Demo timing (must fix first to make E2E tests practical)
|
| 193 |
|
| 194 |
+
## Files Created
|
| 195 |
|
| 196 |
+
1. `tests/e2e/conftest.py` - E2E fixtures (mock_search_handler, mock_judge_handler)
|
| 197 |
+
2. `tests/e2e/test_simple_mode.py` - Simple mode tests (2 tests)
|
| 198 |
+
3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests (1 test, mocked workflow)
|
src/orchestrator_magentic.py
CHANGED
|
@@ -45,6 +45,7 @@ class MagenticOrchestrator:
|
|
| 45 |
max_rounds: int = 10,
|
| 46 |
chat_client: OpenAIChatClient | None = None,
|
| 47 |
api_key: str | None = None,
|
|
|
|
| 48 |
) -> None:
|
| 49 |
"""Initialize orchestrator.
|
| 50 |
|
|
@@ -52,12 +53,14 @@ class MagenticOrchestrator:
|
|
| 52 |
max_rounds: Maximum coordination rounds
|
| 53 |
chat_client: Optional shared chat client for agents
|
| 54 |
api_key: Optional OpenAI API key (for BYOK)
|
|
|
|
| 55 |
"""
|
| 56 |
# Validate requirements only if no key provided
|
| 57 |
if not chat_client and not api_key:
|
| 58 |
check_magentic_requirements()
|
| 59 |
|
| 60 |
self._max_rounds = max_rounds
|
|
|
|
| 61 |
self._chat_client: OpenAIChatClient | None
|
| 62 |
|
| 63 |
if chat_client:
|
|
@@ -170,10 +173,9 @@ The final output should be a structured research report."""
|
|
| 170 |
|
| 171 |
iteration = 0
|
| 172 |
final_event_received = False
|
| 173 |
-
demo_timeout_seconds = 300 # 5 minutes max
|
| 174 |
|
| 175 |
try:
|
| 176 |
-
async with asyncio.timeout(
|
| 177 |
async for event in workflow.run_stream(task):
|
| 178 |
agent_event = self._process_event(event, iteration)
|
| 179 |
if agent_event:
|
|
|
|
| 45 |
max_rounds: int = 10,
|
| 46 |
chat_client: OpenAIChatClient | None = None,
|
| 47 |
api_key: str | None = None,
|
| 48 |
+
timeout_seconds: float = 300.0,
|
| 49 |
) -> None:
|
| 50 |
"""Initialize orchestrator.
|
| 51 |
|
|
|
|
| 53 |
max_rounds: Maximum coordination rounds
|
| 54 |
chat_client: Optional shared chat client for agents
|
| 55 |
api_key: Optional OpenAI API key (for BYOK)
|
| 56 |
+
timeout_seconds: Maximum workflow duration (default: 5 minutes)
|
| 57 |
"""
|
| 58 |
# Validate requirements only if no key provided
|
| 59 |
if not chat_client and not api_key:
|
| 60 |
check_magentic_requirements()
|
| 61 |
|
| 62 |
self._max_rounds = max_rounds
|
| 63 |
+
self._timeout_seconds = timeout_seconds
|
| 64 |
self._chat_client: OpenAIChatClient | None
|
| 65 |
|
| 66 |
if chat_client:
|
|
|
|
| 173 |
|
| 174 |
iteration = 0
|
| 175 |
final_event_received = False
|
|
|
|
| 176 |
|
| 177 |
try:
|
| 178 |
+
async with asyncio.timeout(self._timeout_seconds):
|
| 179 |
async for event in workflow.run_stream(task):
|
| 180 |
agent_event = self._process_event(event, iteration)
|
| 181 |
if agent_event:
|