"""Tests for simple orchestrator LLM synthesis.""" from unittest.mock import AsyncMock, MagicMock, patch import pytest from src.orchestrators.simple import Orchestrator from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment @pytest.fixture def sample_evidence() -> list[Evidence]: """Sample evidence for testing synthesis.""" return [ Evidence( content="Testosterone therapy demonstrates efficacy in treating HSDD.", citation=Citation( source="pubmed", title="Testosterone and Female Sexual Desire", url="https://pubmed.ncbi.nlm.nih.gov/12345/", date="2023", authors=["Smith J", "Jones A"], ), ), Evidence( content="A meta-analysis of 8 RCTs shows significant improvement in sexual desire.", citation=Citation( source="pubmed", title="Meta-analysis of Testosterone Therapy", url="https://pubmed.ncbi.nlm.nih.gov/67890/", date="2024", authors=["Johnson B"], ), ), ] @pytest.fixture def sample_assessment() -> JudgeAssessment: """Sample assessment for testing synthesis.""" return JudgeAssessment( sufficient=True, confidence=0.85, reasoning="Evidence is sufficient to synthesize findings on testosterone therapy for HSDD.", recommendation="synthesize", next_search_queries=[], details=AssessmentDetails( mechanism_score=8, mechanism_reasoning="Strong evidence of androgen receptor activation pathway.", clinical_evidence_score=7, clinical_reasoning="Multiple RCTs support efficacy in postmenopausal HSDD.", drug_candidates=["Testosterone", "LibiGel"], key_findings=[ "Testosterone improves libido in postmenopausal women", "Transdermal formulation has best safety profile", ], ), ) @pytest.mark.unit class TestGenerateSynthesis: """Tests for _generate_synthesis method.""" @pytest.mark.asyncio async def test_calls_llm_for_narrative( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Synthesis should make an LLM call using pydantic_ai when judge is paid tier.""" mock_search = MagicMock() # Paid tier JudgeHandler has 'assess' but NOT 'synthesize' mock_judge = MagicMock(spec=["assess"]) orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] # Needed for footer with ( patch("pydantic_ai.Agent") as mock_agent_class, patch("src.agent_factory.judges.get_model") as mock_get_model, ): mock_model = MagicMock() mock_get_model.return_value = mock_model mock_agent = MagicMock() mock_result = MagicMock() mock_result.output = """### Executive Summary Testosterone therapy demonstrates consistent efficacy for HSDD treatment. ### Background HSDD affects many postmenopausal women. ### Evidence Synthesis Studies show significant improvement in sexual desire scores. ### Recommendations 1. Consider testosterone therapy for postmenopausal HSDD ### Limitations Long-term safety data is limited. ### References 1. Smith J et al. (2023). Testosterone and Female Sexual Desire.""" mock_agent.run = AsyncMock(return_value=mock_result) mock_agent_class.return_value = mock_agent result = await orchestrator._generate_synthesis( query="testosterone HSDD", evidence=sample_evidence, assessment=sample_assessment, ) # Verify LLM agent was created and called mock_agent_class.assert_called_once() mock_agent.run.assert_called_once() # Verify output includes narrative content assert "Executive Summary" in result assert "Background" in result assert "Evidence Synthesis" in result @pytest.mark.asyncio async def test_uses_free_tier_synthesis_when_available( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Synthesis should use judge's synthesize method when in Free Tier.""" mock_search = MagicMock() # Free tier JudgeHandler has 'synthesize' method mock_judge = MagicMock() # Setup synthesize method mock_judge.synthesize = AsyncMock(return_value="Free tier narrative content.") orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] # We don't need to patch Agent or get_model because they shouldn't be called result = await orchestrator._generate_synthesis( query="test query", evidence=sample_evidence, assessment=sample_assessment, ) # Verify judge's synthesize was called mock_judge.synthesize.assert_called_once() # Verify result contains the free tier content assert "Free tier narrative content" in result # Should still include footer assert "Full Citation List" in result @pytest.mark.asyncio async def test_falls_back_on_llm_error_with_notice( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Synthesis should fall back to template if LLM fails, WITH error notice.""" mock_search = MagicMock() # Paid tier simulation mock_judge = MagicMock(spec=["assess"]) orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] with patch("pydantic_ai.Agent") as mock_agent_class: # Simulate LLM failure mock_agent_class.side_effect = Exception("LLM unavailable") result = await orchestrator._generate_synthesis( query="testosterone HSDD", evidence=sample_evidence, assessment=sample_assessment, ) # Should surface error to user (MS Agent Framework pattern) assert "AI narrative synthesis unavailable" in result assert "Error" in result # Should still include template content assert "Assessment" in result or "Drug Candidates" in result assert "Testosterone" in result # Drug candidate should be present @pytest.mark.asyncio async def test_includes_citation_footer( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Synthesis should include full citation list footer.""" mock_search = MagicMock() # Paid tier simulation mock_judge = MagicMock(spec=["assess"]) orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] with ( patch("pydantic_ai.Agent") as mock_agent_class, patch("src.agent_factory.judges.get_model"), ): mock_agent = MagicMock() mock_result = MagicMock() mock_result.output = "Narrative synthesis content." mock_agent.run = AsyncMock(return_value=mock_result) mock_agent_class.return_value = mock_agent result = await orchestrator._generate_synthesis( query="test query", evidence=sample_evidence, assessment=sample_assessment, ) # Should include citation footer assert "Full Citation List" in result assert "pubmed.ncbi.nlm.nih.gov/12345" in result assert "pubmed.ncbi.nlm.nih.gov/67890" in result @pytest.mark.unit class TestGenerateTemplateSynthesis: """Tests for _generate_template_synthesis fallback method.""" def test_returns_structured_output( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Template synthesis should return structured markdown.""" mock_search = MagicMock() mock_judge = MagicMock() orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] result = orchestrator._generate_template_synthesis( query="testosterone HSDD", evidence=sample_evidence, assessment=sample_assessment, ) # Should have all required sections assert "Question" in result assert "Drug Candidates" in result assert "Key Findings" in result assert "Assessment" in result assert "Citations" in result def test_includes_drug_candidates( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Template synthesis should list drug candidates.""" mock_search = MagicMock() mock_judge = MagicMock() orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] result = orchestrator._generate_template_synthesis( query="test", evidence=sample_evidence, assessment=sample_assessment, ) assert "Testosterone" in result assert "LibiGel" in result def test_includes_scores( self, sample_evidence: list[Evidence], sample_assessment: JudgeAssessment, ) -> None: """Template synthesis should include assessment scores.""" mock_search = MagicMock() mock_judge = MagicMock() orchestrator = Orchestrator( search_handler=mock_search, judge_handler=mock_judge, ) orchestrator.history = [{"iteration": 1}] result = orchestrator._generate_template_synthesis( query="test", evidence=sample_evidence, assessment=sample_assessment, ) assert "8/10" in result # Mechanism score assert "7/10" in result # Clinical score assert "85%" in result # Confidence