Spaces:
Running
Running
File size: 10,753 Bytes
89f1173 e18ea9a 89f1173 e18ea9a 89f1173 e18ea9a 89f1173 ad823e0 89f1173 ad823e0 89f1173 e18ea9a 89f1173 ad823e0 89f1173 e18ea9a 89f1173 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 |
"""Tests for simple orchestrator LLM synthesis."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.orchestrators.simple import Orchestrator
from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment
@pytest.fixture
def sample_evidence() -> list[Evidence]:
"""Sample evidence for testing synthesis."""
return [
Evidence(
content="Testosterone therapy demonstrates efficacy in treating HSDD.",
citation=Citation(
source="pubmed",
title="Testosterone and Female Sexual Desire",
url="https://pubmed.ncbi.nlm.nih.gov/12345/",
date="2023",
authors=["Smith J", "Jones A"],
),
),
Evidence(
content="A meta-analysis of 8 RCTs shows significant improvement in sexual desire.",
citation=Citation(
source="pubmed",
title="Meta-analysis of Testosterone Therapy",
url="https://pubmed.ncbi.nlm.nih.gov/67890/",
date="2024",
authors=["Johnson B"],
),
),
]
@pytest.fixture
def sample_assessment() -> JudgeAssessment:
"""Sample assessment for testing synthesis."""
return JudgeAssessment(
sufficient=True,
confidence=0.85,
reasoning="Evidence is sufficient to synthesize findings on testosterone therapy for HSDD.",
recommendation="synthesize",
next_search_queries=[],
details=AssessmentDetails(
mechanism_score=8,
mechanism_reasoning="Strong evidence of androgen receptor activation pathway.",
clinical_evidence_score=7,
clinical_reasoning="Multiple RCTs support efficacy in postmenopausal HSDD.",
drug_candidates=["Testosterone", "LibiGel"],
key_findings=[
"Testosterone improves libido in postmenopausal women",
"Transdermal formulation has best safety profile",
],
),
)
@pytest.mark.unit
class TestGenerateSynthesis:
"""Tests for _generate_synthesis method."""
@pytest.mark.asyncio
async def test_calls_llm_for_narrative(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should make an LLM call using pydantic_ai when judge is paid tier."""
mock_search = MagicMock()
# Paid tier JudgeHandler has 'assess' but NOT 'synthesize'
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}] # Needed for footer
with (
patch("pydantic_ai.Agent") as mock_agent_class,
patch("src.agent_factory.judges.get_model") as mock_get_model,
):
mock_model = MagicMock()
mock_get_model.return_value = mock_model
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.output = """### Executive Summary
Testosterone therapy demonstrates consistent efficacy for HSDD treatment.
### Background
HSDD affects many postmenopausal women.
### Evidence Synthesis
Studies show significant improvement in sexual desire scores.
### Recommendations
1. Consider testosterone therapy for postmenopausal HSDD
### Limitations
Long-term safety data is limited.
### References
1. Smith J et al. (2023). Testosterone and Female Sexual Desire."""
mock_agent.run = AsyncMock(return_value=mock_result)
mock_agent_class.return_value = mock_agent
result = await orchestrator._generate_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Verify LLM agent was created and called
mock_agent_class.assert_called_once()
mock_agent.run.assert_called_once()
# Verify output includes narrative content
assert "Executive Summary" in result
assert "Background" in result
assert "Evidence Synthesis" in result
@pytest.mark.asyncio
async def test_uses_free_tier_synthesis_when_available(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should use judge's synthesize method when in Free Tier."""
mock_search = MagicMock()
# Free tier JudgeHandler has 'synthesize' method
mock_judge = MagicMock()
# Setup synthesize method
mock_judge.synthesize = AsyncMock(return_value="Free tier narrative content.")
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
# We don't need to patch Agent or get_model because they shouldn't be called
result = await orchestrator._generate_synthesis(
query="test query",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Verify judge's synthesize was called
mock_judge.synthesize.assert_called_once()
# Verify result contains the free tier content
assert "Free tier narrative content" in result
# Should still include footer
assert "Full Citation List" in result
@pytest.mark.asyncio
async def test_falls_back_on_llm_error_with_notice(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should fall back to template if LLM fails, WITH error notice."""
mock_search = MagicMock()
# Paid tier simulation
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
with patch("pydantic_ai.Agent") as mock_agent_class:
# Simulate LLM failure
mock_agent_class.side_effect = Exception("LLM unavailable")
result = await orchestrator._generate_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should surface error to user (MS Agent Framework pattern)
assert "AI narrative synthesis unavailable" in result
assert "Error" in result
# Should still include template content
assert "Assessment" in result or "Drug Candidates" in result
assert "Testosterone" in result # Drug candidate should be present
@pytest.mark.asyncio
async def test_includes_citation_footer(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Synthesis should include full citation list footer."""
mock_search = MagicMock()
# Paid tier simulation
mock_judge = MagicMock(spec=["assess"])
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
with (
patch("pydantic_ai.Agent") as mock_agent_class,
patch("src.agent_factory.judges.get_model"),
):
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.output = "Narrative synthesis content."
mock_agent.run = AsyncMock(return_value=mock_result)
mock_agent_class.return_value = mock_agent
result = await orchestrator._generate_synthesis(
query="test query",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should include citation footer
assert "Full Citation List" in result
assert "pubmed.ncbi.nlm.nih.gov/12345" in result
assert "pubmed.ncbi.nlm.nih.gov/67890" in result
@pytest.mark.unit
class TestGenerateTemplateSynthesis:
"""Tests for _generate_template_synthesis fallback method."""
def test_returns_structured_output(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should return structured markdown."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="testosterone HSDD",
evidence=sample_evidence,
assessment=sample_assessment,
)
# Should have all required sections
assert "Question" in result
assert "Drug Candidates" in result
assert "Key Findings" in result
assert "Assessment" in result
assert "Citations" in result
def test_includes_drug_candidates(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should list drug candidates."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="test",
evidence=sample_evidence,
assessment=sample_assessment,
)
assert "Testosterone" in result
assert "LibiGel" in result
def test_includes_scores(
self,
sample_evidence: list[Evidence],
sample_assessment: JudgeAssessment,
) -> None:
"""Template synthesis should include assessment scores."""
mock_search = MagicMock()
mock_judge = MagicMock()
orchestrator = Orchestrator(
search_handler=mock_search,
judge_handler=mock_judge,
)
orchestrator.history = [{"iteration": 1}]
result = orchestrator._generate_template_synthesis(
query="test",
evidence=sample_evidence,
assessment=sample_assessment,
)
assert "8/10" in result # Mechanism score
assert "7/10" in result # Clinical score
assert "85%" in result # Confidence
|