File size: 8,319 Bytes
89f1173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Tests for narrative synthesis prompts."""

import pytest

from src.prompts.synthesis import (
    FEW_SHOT_EXAMPLE,
    format_synthesis_prompt,
    get_synthesis_system_prompt,
)


@pytest.mark.unit
class TestSynthesisSystemPrompt:
    """Tests for synthesis system prompt generation."""

    def test_system_prompt_emphasizes_prose(self) -> None:
        """System prompt should emphasize prose paragraphs, not bullets."""
        prompt = get_synthesis_system_prompt()
        assert "PROSE PARAGRAPHS" in prompt
        assert "not bullet points" in prompt.lower()

    def test_system_prompt_requires_executive_summary(self) -> None:
        """System prompt should require executive summary section."""
        prompt = get_synthesis_system_prompt()
        assert "Executive Summary" in prompt
        assert "REQUIRED" in prompt

    def test_system_prompt_requires_background(self) -> None:
        """System prompt should require background section."""
        prompt = get_synthesis_system_prompt()
        assert "Background" in prompt

    def test_system_prompt_requires_evidence_synthesis(self) -> None:
        """System prompt should require evidence synthesis section."""
        prompt = get_synthesis_system_prompt()
        assert "Evidence Synthesis" in prompt
        assert "Mechanism of Action" in prompt

    def test_system_prompt_requires_recommendations(self) -> None:
        """System prompt should require recommendations section."""
        prompt = get_synthesis_system_prompt()
        assert "Recommendations" in prompt

    def test_system_prompt_requires_limitations(self) -> None:
        """System prompt should require limitations section."""
        prompt = get_synthesis_system_prompt()
        assert "Limitations" in prompt

    def test_system_prompt_warns_about_hallucination(self) -> None:
        """System prompt should warn about citation hallucination."""
        prompt = get_synthesis_system_prompt()
        assert "NEVER hallucinate" in prompt or "never hallucinate" in prompt.lower()

    def test_system_prompt_includes_domain_name(self) -> None:
        """System prompt should include domain name."""
        prompt = get_synthesis_system_prompt("sexual_health")
        assert "sexual health" in prompt.lower()


@pytest.mark.unit
class TestFormatSynthesisPrompt:
    """Tests for synthesis user prompt formatting."""

    def test_includes_query(self) -> None:
        """User prompt should include the research query."""
        prompt = format_synthesis_prompt(
            query="testosterone libido",
            evidence_summary="Study shows efficacy...",
            drug_candidates=["Testosterone"],
            key_findings=["Improved libido"],
            mechanism_score=8,
            clinical_score=7,
            confidence=0.85,
        )
        assert "testosterone libido" in prompt

    def test_includes_evidence_summary(self) -> None:
        """User prompt should include evidence summary."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="Study by Smith et al. shows significant results...",
            drug_candidates=[],
            key_findings=[],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "Study by Smith et al." in prompt

    def test_includes_drug_candidates(self) -> None:
        """User prompt should include drug candidates."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=["Testosterone", "Flibanserin"],
            key_findings=[],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "Testosterone" in prompt
        assert "Flibanserin" in prompt

    def test_includes_key_findings(self) -> None:
        """User prompt should include key findings."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=[],
            key_findings=["Improved libido in postmenopausal women", "Safe profile"],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "Improved libido in postmenopausal women" in prompt
        assert "Safe profile" in prompt

    def test_includes_scores(self) -> None:
        """User prompt should include assessment scores."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=[],
            key_findings=[],
            mechanism_score=8,
            clinical_score=7,
            confidence=0.85,
        )
        assert "8/10" in prompt
        assert "7/10" in prompt
        assert "85%" in prompt

    def test_handles_empty_candidates(self) -> None:
        """User prompt should handle empty drug candidates."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=[],
            key_findings=[],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "None identified" in prompt

    def test_handles_empty_findings(self) -> None:
        """User prompt should handle empty key findings."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=[],
            key_findings=[],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "No specific findings" in prompt

    def test_includes_few_shot_example(self) -> None:
        """User prompt should include few-shot example."""
        prompt = format_synthesis_prompt(
            query="test query",
            evidence_summary="...",
            drug_candidates=[],
            key_findings=[],
            mechanism_score=5,
            clinical_score=5,
            confidence=0.5,
        )
        assert "Alprostadil" in prompt  # From the few-shot example


@pytest.mark.unit
class TestFewShotExample:
    """Tests for the few-shot example quality."""

    def test_few_shot_is_mostly_narrative(self) -> None:
        """Few-shot example should be mostly prose paragraphs, not bullets."""
        # Count substantial paragraphs (>100 chars of prose)
        paragraphs = [p for p in FEW_SHOT_EXAMPLE.split("\n\n") if len(p) > 100]
        # Count bullet points
        bullets = FEW_SHOT_EXAMPLE.count("\n- ") + FEW_SHOT_EXAMPLE.count("\n1. ")

        # Prose should dominate - at least as many paragraphs as bullets
        assert len(paragraphs) >= bullets, "Few-shot example should be mostly narrative prose"

    def test_few_shot_has_executive_summary(self) -> None:
        """Few-shot example should demonstrate executive summary."""
        assert "Executive Summary" in FEW_SHOT_EXAMPLE

    def test_few_shot_has_background(self) -> None:
        """Few-shot example should demonstrate background section."""
        assert "Background" in FEW_SHOT_EXAMPLE

    def test_few_shot_has_evidence_synthesis(self) -> None:
        """Few-shot example should demonstrate evidence synthesis."""
        assert "Evidence Synthesis" in FEW_SHOT_EXAMPLE
        assert "Mechanism of Action" in FEW_SHOT_EXAMPLE

    def test_few_shot_has_recommendations(self) -> None:
        """Few-shot example should demonstrate recommendations."""
        assert "Recommendations" in FEW_SHOT_EXAMPLE

    def test_few_shot_has_limitations(self) -> None:
        """Few-shot example should demonstrate limitations."""
        assert "Limitations" in FEW_SHOT_EXAMPLE

    def test_few_shot_has_references(self) -> None:
        """Few-shot example should demonstrate references format."""
        assert "References" in FEW_SHOT_EXAMPLE
        assert "pubmed.ncbi.nlm.nih.gov" in FEW_SHOT_EXAMPLE

    def test_few_shot_includes_statistics(self) -> None:
        """Few-shot example should demonstrate statistical reporting."""
        assert "%" in FEW_SHOT_EXAMPLE  # Percentages
        assert "p<" in FEW_SHOT_EXAMPLE or "p=" in FEW_SHOT_EXAMPLE  # P-values
        assert "CI" in FEW_SHOT_EXAMPLE  # Confidence intervals