|
|
"""Tests for simple orchestrator LLM synthesis.""" |
|
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch |
|
|
|
|
|
import pytest |
|
|
|
|
|
from src.orchestrators.simple import Orchestrator |
|
|
from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def sample_evidence() -> list[Evidence]: |
|
|
"""Sample evidence for testing synthesis.""" |
|
|
return [ |
|
|
Evidence( |
|
|
content="Testosterone therapy demonstrates efficacy in treating HSDD.", |
|
|
citation=Citation( |
|
|
source="pubmed", |
|
|
title="Testosterone and Female Sexual Desire", |
|
|
url="https://pubmed.ncbi.nlm.nih.gov/12345/", |
|
|
date="2023", |
|
|
authors=["Smith J", "Jones A"], |
|
|
), |
|
|
), |
|
|
Evidence( |
|
|
content="A meta-analysis of 8 RCTs shows significant improvement in sexual desire.", |
|
|
citation=Citation( |
|
|
source="pubmed", |
|
|
title="Meta-analysis of Testosterone Therapy", |
|
|
url="https://pubmed.ncbi.nlm.nih.gov/67890/", |
|
|
date="2024", |
|
|
authors=["Johnson B"], |
|
|
), |
|
|
), |
|
|
] |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def sample_assessment() -> JudgeAssessment: |
|
|
"""Sample assessment for testing synthesis.""" |
|
|
return JudgeAssessment( |
|
|
sufficient=True, |
|
|
confidence=0.85, |
|
|
reasoning="Evidence is sufficient to synthesize findings on testosterone therapy for HSDD.", |
|
|
recommendation="synthesize", |
|
|
next_search_queries=[], |
|
|
details=AssessmentDetails( |
|
|
mechanism_score=8, |
|
|
mechanism_reasoning="Strong evidence of androgen receptor activation pathway.", |
|
|
clinical_evidence_score=7, |
|
|
clinical_reasoning="Multiple RCTs support efficacy in postmenopausal HSDD.", |
|
|
drug_candidates=["Testosterone", "LibiGel"], |
|
|
key_findings=[ |
|
|
"Testosterone improves libido in postmenopausal women", |
|
|
"Transdermal formulation has best safety profile", |
|
|
], |
|
|
), |
|
|
) |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestGenerateSynthesis: |
|
|
"""Tests for _generate_synthesis method.""" |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_calls_llm_for_narrative( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Synthesis should make an LLM call using pydantic_ai when judge is paid tier.""" |
|
|
mock_search = MagicMock() |
|
|
|
|
|
mock_judge = MagicMock(spec=["assess"]) |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
with ( |
|
|
patch("pydantic_ai.Agent") as mock_agent_class, |
|
|
patch("src.agent_factory.judges.get_model") as mock_get_model, |
|
|
): |
|
|
mock_model = MagicMock() |
|
|
mock_get_model.return_value = mock_model |
|
|
|
|
|
mock_agent = MagicMock() |
|
|
mock_result = MagicMock() |
|
|
mock_result.output = """### Executive Summary |
|
|
|
|
|
Testosterone therapy demonstrates consistent efficacy for HSDD treatment. |
|
|
|
|
|
### Background |
|
|
|
|
|
HSDD affects many postmenopausal women. |
|
|
|
|
|
### Evidence Synthesis |
|
|
|
|
|
Studies show significant improvement in sexual desire scores. |
|
|
|
|
|
### Recommendations |
|
|
|
|
|
1. Consider testosterone therapy for postmenopausal HSDD |
|
|
|
|
|
### Limitations |
|
|
|
|
|
Long-term safety data is limited. |
|
|
|
|
|
### References |
|
|
|
|
|
1. Smith J et al. (2023). Testosterone and Female Sexual Desire.""" |
|
|
|
|
|
mock_agent.run = AsyncMock(return_value=mock_result) |
|
|
mock_agent_class.return_value = mock_agent |
|
|
|
|
|
result = await orchestrator._generate_synthesis( |
|
|
query="testosterone HSDD", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
|
|
|
mock_agent_class.assert_called_once() |
|
|
mock_agent.run.assert_called_once() |
|
|
|
|
|
|
|
|
assert "Executive Summary" in result |
|
|
assert "Background" in result |
|
|
assert "Evidence Synthesis" in result |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_uses_free_tier_synthesis_when_available( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Synthesis should use judge's synthesize method when in Free Tier.""" |
|
|
mock_search = MagicMock() |
|
|
|
|
|
mock_judge = MagicMock() |
|
|
|
|
|
mock_judge.synthesize = AsyncMock(return_value="Free tier narrative content.") |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
|
|
|
result = await orchestrator._generate_synthesis( |
|
|
query="test query", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
|
|
|
mock_judge.synthesize.assert_called_once() |
|
|
|
|
|
|
|
|
assert "Free tier narrative content" in result |
|
|
|
|
|
assert "Full Citation List" in result |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_falls_back_on_llm_error_with_notice( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Synthesis should fall back to template if LLM fails, WITH error notice.""" |
|
|
mock_search = MagicMock() |
|
|
|
|
|
mock_judge = MagicMock(spec=["assess"]) |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
with patch("pydantic_ai.Agent") as mock_agent_class: |
|
|
|
|
|
mock_agent_class.side_effect = Exception("LLM unavailable") |
|
|
|
|
|
result = await orchestrator._generate_synthesis( |
|
|
query="testosterone HSDD", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
|
|
|
assert "AI narrative synthesis unavailable" in result |
|
|
assert "Error" in result |
|
|
|
|
|
|
|
|
assert "Assessment" in result or "Drug Candidates" in result |
|
|
assert "Testosterone" in result |
|
|
|
|
|
@pytest.mark.asyncio |
|
|
async def test_includes_citation_footer( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Synthesis should include full citation list footer.""" |
|
|
mock_search = MagicMock() |
|
|
|
|
|
mock_judge = MagicMock(spec=["assess"]) |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
with ( |
|
|
patch("pydantic_ai.Agent") as mock_agent_class, |
|
|
patch("src.agent_factory.judges.get_model"), |
|
|
): |
|
|
mock_agent = MagicMock() |
|
|
mock_result = MagicMock() |
|
|
mock_result.output = "Narrative synthesis content." |
|
|
mock_agent.run = AsyncMock(return_value=mock_result) |
|
|
mock_agent_class.return_value = mock_agent |
|
|
|
|
|
result = await orchestrator._generate_synthesis( |
|
|
query="test query", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
|
|
|
assert "Full Citation List" in result |
|
|
assert "pubmed.ncbi.nlm.nih.gov/12345" in result |
|
|
assert "pubmed.ncbi.nlm.nih.gov/67890" in result |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestGenerateTemplateSynthesis: |
|
|
"""Tests for _generate_template_synthesis fallback method.""" |
|
|
|
|
|
def test_returns_structured_output( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Template synthesis should return structured markdown.""" |
|
|
mock_search = MagicMock() |
|
|
mock_judge = MagicMock() |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
result = orchestrator._generate_template_synthesis( |
|
|
query="testosterone HSDD", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
|
|
|
assert "Question" in result |
|
|
assert "Drug Candidates" in result |
|
|
assert "Key Findings" in result |
|
|
assert "Assessment" in result |
|
|
assert "Citations" in result |
|
|
|
|
|
def test_includes_drug_candidates( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Template synthesis should list drug candidates.""" |
|
|
mock_search = MagicMock() |
|
|
mock_judge = MagicMock() |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
result = orchestrator._generate_template_synthesis( |
|
|
query="test", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
assert "Testosterone" in result |
|
|
assert "LibiGel" in result |
|
|
|
|
|
def test_includes_scores( |
|
|
self, |
|
|
sample_evidence: list[Evidence], |
|
|
sample_assessment: JudgeAssessment, |
|
|
) -> None: |
|
|
"""Template synthesis should include assessment scores.""" |
|
|
mock_search = MagicMock() |
|
|
mock_judge = MagicMock() |
|
|
|
|
|
orchestrator = Orchestrator( |
|
|
search_handler=mock_search, |
|
|
judge_handler=mock_judge, |
|
|
) |
|
|
orchestrator.history = [{"iteration": 1}] |
|
|
|
|
|
result = orchestrator._generate_template_synthesis( |
|
|
query="test", |
|
|
evidence=sample_evidence, |
|
|
assessment=sample_assessment, |
|
|
) |
|
|
|
|
|
assert "8/10" in result |
|
|
assert "7/10" in result |
|
|
assert "85%" in result |
|
|
|