from unittest.mock import AsyncMock import pytest from src.orchestrators.simple import Orchestrator from src.utils.models import ( AssessmentDetails, Citation, Evidence, JudgeAssessment, OrchestratorConfig, SearchResult, ) def make_evidence(title: str) -> Evidence: return Evidence( content="content", citation=Citation(title=title, url="http://test.com", date="2025", source="pubmed"), ) @pytest.mark.integration @pytest.mark.asyncio async def test_simple_mode_synthesizes_before_max_iterations(): """Verify simple mode produces useful output with mocked judge.""" # Mock search to return evidence mock_search = AsyncMock() mock_search.execute.return_value = SearchResult( query="test query", evidence=[make_evidence(f"Paper {i}") for i in range(5)], errors=[], sources_searched=["pubmed"], total_found=5, ) # Mock judge to return GOOD scores eventually # We can use MockJudgeHandler or a pure mock. Let's use a pure mock to control scores precisely. mock_judge = AsyncMock() # Since mock_judge has 'synthesize' attr by default (as a Mock), # simple mode uses free-tier path. # We must mock the return value of synthesize to simulate a successful narrative generation. mock_judge.synthesize.return_value = "This is a synthesized report for MagicDrug." # Iteration 1: Low scores assess_1 = JudgeAssessment( details=AssessmentDetails( mechanism_score=2, mechanism_reasoning="reasoning is sufficient for valid model", clinical_evidence_score=2, clinical_reasoning="reasoning is sufficient for valid model", drug_candidates=[], key_findings=[], ), sufficient=False, confidence=0.5, recommendation="continue", next_search_queries=["q2"], reasoning="need more evidence to support conclusions about this topic", ) # Iteration 2: High scores (should trigger synthesis) assess_2 = JudgeAssessment( details=AssessmentDetails( mechanism_score=8, mechanism_reasoning="reasoning is sufficient for valid model", clinical_evidence_score=7, clinical_reasoning="reasoning is sufficient for valid model", drug_candidates=["MagicDrug"], key_findings=["It works"], ), sufficient=False, # Judge is conservative confidence=0.9, recommendation="continue", # Judge still says continue (simulating bias) next_search_queries=[], reasoning="good scores but maybe more evidence needed technically", ) mock_judge.assess.side_effect = [assess_1, assess_2] orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, config=OrchestratorConfig(max_iterations=5), ) events = [] async for event in orchestrator.run("test query"): events.append(event) if event.type == "complete": break # Must have synthesis with drug candidates complete_events = [e for e in events if e.type == "complete"] assert len(complete_events) == 1 complete_event = complete_events[0] assert "MagicDrug" in complete_event.message # SPEC_12: LLM synthesis produces narrative prose, not template with "Drug Candidates" header # Check for narrative structure (LLM may omit ### prefix) OR template fallback assert ( "Executive Summary" in complete_event.message or "Drug Candidates" in complete_event.message or "synthesized report" in complete_event.message ) assert complete_event.data.get("synthesis_reason") == "high_scores_with_candidates" assert complete_event.iteration == 2 # Should stop at it 2 @pytest.mark.integration @pytest.mark.asyncio async def test_partial_synthesis_generation(): """Verify partial synthesis includes drug candidates even if max iterations reached.""" mock_search = AsyncMock() mock_search.execute.return_value = SearchResult( query="test", evidence=[], errors=[], sources_searched=["pubmed"], total_found=0 ) mock_judge = AsyncMock() # Always return low scores but WITH candidates # Scores 3+3 = 6 < 8 (late threshold), so it should NOT synthesize early mock_judge.assess.return_value = JudgeAssessment( details=AssessmentDetails( mechanism_score=3, mechanism_reasoning="reasoning is sufficient for valid model", clinical_evidence_score=3, clinical_reasoning="reasoning is sufficient for valid model", drug_candidates=["PartialDrug"], key_findings=["Partial finding"], ), sufficient=False, confidence=0.5, recommendation="continue", next_search_queries=[], reasoning="keep going to find more evidence about this topic please", ) orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, config=OrchestratorConfig(max_iterations=2), ) events = [] async for event in orchestrator.run("test"): events.append(event) complete_events = [e for e in events if e.type == "complete"] assert len(complete_events) == 1, ( f"Expected exactly one complete event, got {len(complete_events)}" ) complete_event = complete_events[0] assert complete_event.data.get("max_reached") is True # The output message should contain the drug candidate from the last assessment assert "PartialDrug" in complete_event.message assert "Maximum iterations reached" in complete_event.message