DeepBoner / tests /unit /orchestrators /test_simple_synthesis.py
VibecoderMcSwaggins's picture
feat: Implement Free Tier synthesis using HuggingFace Inference
e18ea9a
"""Tests for simple orchestrator LLM synthesis."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.orchestrators.simple import Orchestrator
from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment
@pytest.fixture
def sample_evidence() -> list[Evidence]:
"""Sample evidence for testing synthesis."""
return [
Evidence(
content="Testosterone therapy demonstrates efficacy in treating HSDD.",
citation=Citation(
source="pubmed",
title="Testosterone and Female Sexual Desire",
url="https://pubmed.ncbi.nlm.nih.gov/12345/",
date="2023",
authors=["Smith J", "Jones A"],
),
),
Evidence(
content="A meta-analysis of 8 RCTs shows significant improvement in sexual desire.",
citation=Citation(
source="pubmed",
title="Meta-analysis of Testosterone Therapy",
url="https://pubmed.ncbi.nlm.nih.gov/67890/",
date="2024",
authors=["Johnson B"],
),
),
]
@pytest.fixture
def sample_assessment() -> JudgeAssessment:
"""Sample assessment for testing synthesis."""
return JudgeAssessment(
sufficient=True,
confidence=0.85,
reasoning="Evidence is sufficient to synthesize findings on testosterone therapy for HSDD.",
recommendation="synthesize",
next_search_queries=[],
details=AssessmentDetails(
mechanism_score=8,
mechanism_reasoning="Strong evidence of androgen receptor activation pathway.",
clinical_evidence_score=7,
clinical_reasoning="Multiple RCTs support efficacy in postmenopausal HSDD.",
drug_candidates=["Testosterone", "LibiGel"],
key_findings=[
"Testosterone improves libido in postmenopausal women",
"Transdermal formulation has best safety profile",
],
),
)
@pytest.mark.unit
class TestGenerateSynthesis:
"""Tests for _generate_synthesis method."""
@pytest.mark.asyncio
async def test_calls_llm_for_narrative(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should make an LLM call using pydantic_ai when judge is paid tier."""
mock_search = MagicMock()
# Paid tier JudgeHandler has 'assess' but NOT 'synthesize'
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}] # Needed for footer
with (
patch("pydantic_ai.Agent") as mock_agent_class,
patch("src.agent_factory.judges.get_model") as mock_get_model,
):
mock_model = MagicMock()
mock_get_model.return_value = mock_model
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.output = """### Executive Summary
Testosterone therapy demonstrates consistent efficacy for HSDD treatment.
### Background
HSDD affects many postmenopausal women.
### Evidence Synthesis
Studies show significant improvement in sexual desire scores.
### Recommendations
1. Consider testosterone therapy for postmenopausal HSDD
### Limitations
Long-term safety data is limited.
### References
1. Smith J et al. (2023). Testosterone and Female Sexual Desire."""
mock_agent.run = AsyncMock(return_value=mock_result)
mock_agent_class.return_value = mock_agent
result = await orchestrator._generate_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Verify LLM agent was created and called
mock_agent_class.assert_called_once()
mock_agent.run.assert_called_once()
# Verify output includes narrative content
assert "Executive Summary" in result
assert "Background" in result
assert "Evidence Synthesis" in result
@pytest.mark.asyncio
async def test_uses_free_tier_synthesis_when_available(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should use judge's synthesize method when in Free Tier."""
mock_search = MagicMock()
# Free tier JudgeHandler has 'synthesize' method
mock_judge = MagicMock()
# Setup synthesize method
mock_judge.synthesize = AsyncMock(return_value="Free tier narrative content.")
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
# We don't need to patch Agent or get_model because they shouldn't be called
result = await orchestrator._generate_synthesis(
query="test query",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Verify judge's synthesize was called
mock_judge.synthesize.assert_called_once()
# Verify result contains the free tier content
assert "Free tier narrative content" in result
# Should still include footer
assert "Full Citation List" in result
@pytest.mark.asyncio
async def test_falls_back_on_llm_error_with_notice(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should fall back to template if LLM fails, WITH error notice."""
mock_search = MagicMock()
# Paid tier simulation
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
with patch("pydantic_ai.Agent") as mock_agent_class:
# Simulate LLM failure
mock_agent_class.side_effect = Exception("LLM unavailable")
result = await orchestrator._generate_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should surface error to user (MS Agent Framework pattern)
assert "AI narrative synthesis unavailable" in result
assert "Error" in result
# Should still include template content
assert "Assessment" in result or "Drug Candidates" in result
assert "Testosterone" in result # Drug candidate should be present
@pytest.mark.asyncio
async def test_includes_citation_footer(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should include full citation list footer."""
mock_search = MagicMock()
# Paid tier simulation
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
with (
patch("pydantic_ai.Agent") as mock_agent_class,
patch("src.agent_factory.judges.get_model"),
):
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.output = "Narrative synthesis content."
mock_agent.run = AsyncMock(return_value=mock_result)
mock_agent_class.return_value = mock_agent
result = await orchestrator._generate_synthesis(
query="test query",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should include citation footer
assert "Full Citation List" in result
assert "pubmed.ncbi.nlm.nih.gov/12345" in result
assert "pubmed.ncbi.nlm.nih.gov/67890" in result
@pytest.mark.unit
class TestGenerateTemplateSynthesis:
"""Tests for _generate_template_synthesis fallback method."""
def test_returns_structured_output(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should return structured markdown."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should have all required sections
assert "Question" in result
assert "Drug Candidates" in result
assert "Key Findings" in result
assert "Assessment" in result
assert "Citations" in result
def test_includes_drug_candidates(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should list drug candidates."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="test",
evidence=sample_evidence,
assessment=sample_assessment,
)
assert "Testosterone" in result
assert "LibiGel" in result
def test_includes_scores(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should include assessment scores."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="test",
evidence=sample_evidence,
assessment=sample_assessment,
)
assert "8/10" in result # Mechanism score
assert "7/10" in result # Clinical score
assert "85%" in result # Confidence