File size: 10,753 Bytes
89f1173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e18ea9a
89f1173
e18ea9a
 
89f1173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e18ea9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89f1173
ad823e0
89f1173
 
 
 
ad823e0
89f1173
e18ea9a
 
89f1173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad823e0
 
 
 
 
89f1173
 
 
 
 
 
 
 
 
 
 
e18ea9a
 
89f1173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
"""Tests for simple orchestrator LLM synthesis."""

from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from src.orchestrators.simple import Orchestrator
from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment


@pytest.fixture
def sample_evidence() -> list[Evidence]:
    """Sample evidence for testing synthesis."""
    return [
        Evidence(
            content="Testosterone therapy demonstrates efficacy in treating HSDD.",
            citation=Citation(
                source="pubmed",
                title="Testosterone and Female Sexual Desire",
                url="https://pubmed.ncbi.nlm.nih.gov/12345/",
                date="2023",
                authors=["Smith J", "Jones A"],
            ),
        ),
        Evidence(
            content="A meta-analysis of 8 RCTs shows significant improvement in sexual desire.",
            citation=Citation(
                source="pubmed",
                title="Meta-analysis of Testosterone Therapy",
                url="https://pubmed.ncbi.nlm.nih.gov/67890/",
                date="2024",
                authors=["Johnson B"],
            ),
        ),
    ]


@pytest.fixture
def sample_assessment() -> JudgeAssessment:
    """Sample assessment for testing synthesis."""
    return JudgeAssessment(
        sufficient=True,
        confidence=0.85,
        reasoning="Evidence is sufficient to synthesize findings on testosterone therapy for HSDD.",
        recommendation="synthesize",
        next_search_queries=[],
        details=AssessmentDetails(
            mechanism_score=8,
            mechanism_reasoning="Strong evidence of androgen receptor activation pathway.",
            clinical_evidence_score=7,
            clinical_reasoning="Multiple RCTs support efficacy in postmenopausal HSDD.",
            drug_candidates=["Testosterone", "LibiGel"],
            key_findings=[
                "Testosterone improves libido in postmenopausal women",
                "Transdermal formulation has best safety profile",
            ],
        ),
    )


@pytest.mark.unit
class TestGenerateSynthesis:
    """Tests for _generate_synthesis method."""

    @pytest.mark.asyncio
    async def test_calls_llm_for_narrative(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Synthesis should make an LLM call using pydantic_ai when judge is paid tier."""
        mock_search = MagicMock()
        # Paid tier JudgeHandler has 'assess' but NOT 'synthesize'
        mock_judge = MagicMock(spec=["assess"])

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]  # Needed for footer

        with (
            patch("pydantic_ai.Agent") as mock_agent_class,
            patch("src.agent_factory.judges.get_model") as mock_get_model,
        ):
            mock_model = MagicMock()
            mock_get_model.return_value = mock_model

            mock_agent = MagicMock()
            mock_result = MagicMock()
            mock_result.output = """### Executive Summary

Testosterone therapy demonstrates consistent efficacy for HSDD treatment.

### Background

HSDD affects many postmenopausal women.

### Evidence Synthesis

Studies show significant improvement in sexual desire scores.

### Recommendations

1. Consider testosterone therapy for postmenopausal HSDD

### Limitations

Long-term safety data is limited.

### References

1. Smith J et al. (2023). Testosterone and Female Sexual Desire."""

            mock_agent.run = AsyncMock(return_value=mock_result)
            mock_agent_class.return_value = mock_agent

            result = await orchestrator._generate_synthesis(
                query="testosterone HSDD",
                evidence=sample_evidence,
                assessment=sample_assessment,
            )

            # Verify LLM agent was created and called
            mock_agent_class.assert_called_once()
            mock_agent.run.assert_called_once()

            # Verify output includes narrative content
            assert "Executive Summary" in result
            assert "Background" in result
            assert "Evidence Synthesis" in result

    @pytest.mark.asyncio
    async def test_uses_free_tier_synthesis_when_available(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Synthesis should use judge's synthesize method when in Free Tier."""
        mock_search = MagicMock()
        # Free tier JudgeHandler has 'synthesize' method
        mock_judge = MagicMock()
        # Setup synthesize method
        mock_judge.synthesize = AsyncMock(return_value="Free tier narrative content.")

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        # We don't need to patch Agent or get_model because they shouldn't be called
        result = await orchestrator._generate_synthesis(
            query="test query",
            evidence=sample_evidence,
            assessment=sample_assessment,
        )

        # Verify judge's synthesize was called
        mock_judge.synthesize.assert_called_once()

        # Verify result contains the free tier content
        assert "Free tier narrative content" in result
        # Should still include footer
        assert "Full Citation List" in result

    @pytest.mark.asyncio
    async def test_falls_back_on_llm_error_with_notice(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Synthesis should fall back to template if LLM fails, WITH error notice."""
        mock_search = MagicMock()
        # Paid tier simulation
        mock_judge = MagicMock(spec=["assess"])

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        with patch("pydantic_ai.Agent") as mock_agent_class:
            # Simulate LLM failure
            mock_agent_class.side_effect = Exception("LLM unavailable")

            result = await orchestrator._generate_synthesis(
                query="testosterone HSDD",
                evidence=sample_evidence,
                assessment=sample_assessment,
            )

            # Should surface error to user (MS Agent Framework pattern)
            assert "AI narrative synthesis unavailable" in result
            assert "Error" in result

            # Should still include template content
            assert "Assessment" in result or "Drug Candidates" in result
            assert "Testosterone" in result  # Drug candidate should be present

    @pytest.mark.asyncio
    async def test_includes_citation_footer(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Synthesis should include full citation list footer."""
        mock_search = MagicMock()
        # Paid tier simulation
        mock_judge = MagicMock(spec=["assess"])

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        with (
            patch("pydantic_ai.Agent") as mock_agent_class,
            patch("src.agent_factory.judges.get_model"),
        ):
            mock_agent = MagicMock()
            mock_result = MagicMock()
            mock_result.output = "Narrative synthesis content."
            mock_agent.run = AsyncMock(return_value=mock_result)
            mock_agent_class.return_value = mock_agent

            result = await orchestrator._generate_synthesis(
                query="test query",
                evidence=sample_evidence,
                assessment=sample_assessment,
            )

            # Should include citation footer
            assert "Full Citation List" in result
            assert "pubmed.ncbi.nlm.nih.gov/12345" in result
            assert "pubmed.ncbi.nlm.nih.gov/67890" in result


@pytest.mark.unit
class TestGenerateTemplateSynthesis:
    """Tests for _generate_template_synthesis fallback method."""

    def test_returns_structured_output(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Template synthesis should return structured markdown."""
        mock_search = MagicMock()
        mock_judge = MagicMock()

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        result = orchestrator._generate_template_synthesis(
            query="testosterone HSDD",
            evidence=sample_evidence,
            assessment=sample_assessment,
        )

        # Should have all required sections
        assert "Question" in result
        assert "Drug Candidates" in result
        assert "Key Findings" in result
        assert "Assessment" in result
        assert "Citations" in result

    def test_includes_drug_candidates(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Template synthesis should list drug candidates."""
        mock_search = MagicMock()
        mock_judge = MagicMock()

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        result = orchestrator._generate_template_synthesis(
            query="test",
            evidence=sample_evidence,
            assessment=sample_assessment,
        )

        assert "Testosterone" in result
        assert "LibiGel" in result

    def test_includes_scores(
        self,
        sample_evidence: list[Evidence],
        sample_assessment: JudgeAssessment,
    ) -> None:
        """Template synthesis should include assessment scores."""
        mock_search = MagicMock()
        mock_judge = MagicMock()

        orchestrator = Orchestrator(
            search_handler=mock_search,
            judge_handler=mock_judge,
        )
        orchestrator.history = [{"iteration": 1}]

        result = orchestrator._generate_template_synthesis(
            query="test",
            evidence=sample_evidence,
            assessment=sample_assessment,
        )

        assert "8/10" in result  # Mechanism score
        assert "7/10" in result  # Clinical score
        assert "85%" in result  # Confidence