Spaces:

VibecoderMcSwaggins
/

DeepBoner

Paused

VibecoderMcSwaggins commited on 17 days ago

Commit

97c317b

1 Parent(s): 10e320d

fix: address CodeRabbit review feedback

SPEC_02 (CRITICAL):
- Update code examples to match actual implementation
- Replace non-existent create_test_orchestrator() with real pattern
- Show actual pytest fixtures (mock_search_handler, mock_judge_handler)
- Change "Files to Create" → "Files Created"

orchestrator_magentic.py (nitpick):
- Make timeout configurable via constructor parameter
- timeout_seconds defaults to 300.0 (5 minutes)

All 149 tests passing.

Files changed (2) hide show

docs/specs/SPEC_02_E2E_TESTING.md +60 -21
src/orchestrator_magentic.py +4 -2

docs/specs/SPEC_02_E2E_TESTING.md CHANGED Viewed

@@ -18,13 +18,21 @@ We don't know if:
 ### Level 1: Smoke Test (Does it run?)
 ```python
 @pytest.mark.e2e
-async def test_simple_mode_completes():
     """Verify Simple mode runs without crashing."""
     from src.orchestrator import Orchestrator
-    # Mock the search tools to avoid real API calls
-    orchestrator = create_test_orchestrator(mode="simple")
     events = []
     async for event in orchestrator.run("test query"):
@@ -79,23 +87,54 @@ async def test_output_quality():
 ### Mocking Strategy
-For CI/fast tests, mock external APIs:
 ```python
 @pytest.fixture
-def mock_pubmed():
-    """Return realistic but fake PubMed results."""
-    return [
-        Evidence(
-            content="Metformin improves insulin sensitivity...",
-            citation=Citation(
-                source="pubmed",
-                title="Metformin in PCOS: A Meta-Analysis",
-                url="https://pubmed.ncbi.nlm.nih.gov/12345678/",
-                date="2024",
-            )
         )
-    ]
 ```
 ### Integration Tests (Real APIs)
@@ -152,8 +191,8 @@ tests/
 - #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
 - #65: Demo timing (must fix first to make E2E tests practical)
-## Files to Create
-1. `tests/e2e/conftest.py` - E2E fixtures and mocks
-2. `tests/e2e/test_simple_mode.py` - Simple mode tests
-3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests

 ### Level 1: Smoke Test (Does it run?)
 ```python
+@pytest.mark.asyncio
 @pytest.mark.e2e
+async def test_simple_mode_completes(mock_search_handler, mock_judge_handler):
     """Verify Simple mode runs without crashing."""
     from src.orchestrator import Orchestrator
+    from src.utils.models import OrchestratorConfig
+    config = OrchestratorConfig(max_iterations=2)
+    orchestrator = Orchestrator(
+        search_handler=mock_search_handler,
+        judge_handler=mock_judge_handler,
+        config=config,
+        enable_analysis=False,
+        enable_embeddings=False,
+    )
     events = []
     async for event in orchestrator.run("test query"):
 ### Mocking Strategy
+For CI/fast tests, mock external APIs via pytest fixtures in `tests/e2e/conftest.py`:
 ```python
 @pytest.fixture
+def mock_search_handler():
+    """Return a mock search handler that returns fake evidence."""
+    from unittest.mock import MagicMock
+    from src.utils.models import Citation, Evidence, SearchResult
+    async def mock_execute(query: str):
+        return SearchResult(
+            evidence=[
+                Evidence(
+                    content="Study on test query showing positive results...",
+                    citation=Citation(
+                        source="pubmed",
+                        title="Study on test query",
+                        url="https://pubmed.example.com/123",
+                        date="2024",
+                    ),
+                )
+            ],
+            sources_searched=["pubmed", "clinicaltrials"],
         )
+    mock = MagicMock()
+    mock.execute = mock_execute
+    return mock
+@pytest.fixture
+def mock_judge_handler():
+    """Return a mock judge that always says 'synthesize'."""
+    from unittest.mock import MagicMock
+    from src.utils.models import JudgeAssessment
+    async def mock_assess(evidence, query):
+        return JudgeAssessment(
+            sufficient=True,
+            reasoning="Mock: Evidence is sufficient",
+            suggested_refinements=[],
+            key_findings=["Finding 1", "Finding 2"],
+            evidence_gaps=[],
+            recommended_drugs=["MockDrug A", "MockDrug B"],
+        )
+    mock = MagicMock()
+    mock.assess = mock_assess
+    return mock
 ```
 ### Integration Tests (Real APIs)
 - #47: E2E Testing - Does Pipeline Actually Generate Useful Reports?
 - #65: Demo timing (must fix first to make E2E tests practical)
+## Files Created
+1. `tests/e2e/conftest.py` - E2E fixtures (mock_search_handler, mock_judge_handler)
+2. `tests/e2e/test_simple_mode.py` - Simple mode tests (2 tests)
+3. `tests/e2e/test_advanced_mode.py` - Advanced mode tests (1 test, mocked workflow)

src/orchestrator_magentic.py CHANGED Viewed

@@ -45,6 +45,7 @@ class MagenticOrchestrator:
         max_rounds: int = 10,
         chat_client: OpenAIChatClient | None = None,
         api_key: str | None = None,
     ) -> None:
         """Initialize orchestrator.
@@ -52,12 +53,14 @@ class MagenticOrchestrator:
             max_rounds: Maximum coordination rounds
             chat_client: Optional shared chat client for agents
             api_key: Optional OpenAI API key (for BYOK)
         """
         # Validate requirements only if no key provided
         if not chat_client and not api_key:
             check_magentic_requirements()
         self._max_rounds = max_rounds
         self._chat_client: OpenAIChatClient | None
         if chat_client:
@@ -170,10 +173,9 @@ The final output should be a structured research report."""
         iteration = 0
         final_event_received = False
-        demo_timeout_seconds = 300  # 5 minutes max
         try:
-            async with asyncio.timeout(demo_timeout_seconds):
                 async for event in workflow.run_stream(task):
                     agent_event = self._process_event(event, iteration)
                     if agent_event:

         max_rounds: int = 10,
         chat_client: OpenAIChatClient | None = None,
         api_key: str | None = None,
+        timeout_seconds: float = 300.0,
     ) -> None:
         """Initialize orchestrator.
             max_rounds: Maximum coordination rounds
             chat_client: Optional shared chat client for agents
             api_key: Optional OpenAI API key (for BYOK)
+            timeout_seconds: Maximum workflow duration (default: 5 minutes)
         """
         # Validate requirements only if no key provided
         if not chat_client and not api_key:
             check_magentic_requirements()
         self._max_rounds = max_rounds
+        self._timeout_seconds = timeout_seconds
         self._chat_client: OpenAIChatClient | None
         if chat_client:
         iteration = 0
         final_event_received = False
         try:
+            async with asyncio.timeout(self._timeout_seconds):
                 async for event in workflow.run_stream(task):
                     agent_event = self._process_event(event, iteration)
                     if agent_event: