Spaces:
Running
Running
File size: 8,319 Bytes
89f1173 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
"""Tests for narrative synthesis prompts."""
import pytest
from src.prompts.synthesis import (
FEW_SHOT_EXAMPLE,
format_synthesis_prompt,
get_synthesis_system_prompt,
)
@pytest.mark.unit
class TestSynthesisSystemPrompt:
"""Tests for synthesis system prompt generation."""
def test_system_prompt_emphasizes_prose(self) -> None:
"""System prompt should emphasize prose paragraphs, not bullets."""
prompt = get_synthesis_system_prompt()
assert "PROSE PARAGRAPHS" in prompt
assert "not bullet points" in prompt.lower()
def test_system_prompt_requires_executive_summary(self) -> None:
"""System prompt should require executive summary section."""
prompt = get_synthesis_system_prompt()
assert "Executive Summary" in prompt
assert "REQUIRED" in prompt
def test_system_prompt_requires_background(self) -> None:
"""System prompt should require background section."""
prompt = get_synthesis_system_prompt()
assert "Background" in prompt
def test_system_prompt_requires_evidence_synthesis(self) -> None:
"""System prompt should require evidence synthesis section."""
prompt = get_synthesis_system_prompt()
assert "Evidence Synthesis" in prompt
assert "Mechanism of Action" in prompt
def test_system_prompt_requires_recommendations(self) -> None:
"""System prompt should require recommendations section."""
prompt = get_synthesis_system_prompt()
assert "Recommendations" in prompt
def test_system_prompt_requires_limitations(self) -> None:
"""System prompt should require limitations section."""
prompt = get_synthesis_system_prompt()
assert "Limitations" in prompt
def test_system_prompt_warns_about_hallucination(self) -> None:
"""System prompt should warn about citation hallucination."""
prompt = get_synthesis_system_prompt()
assert "NEVER hallucinate" in prompt or "never hallucinate" in prompt.lower()
def test_system_prompt_includes_domain_name(self) -> None:
"""System prompt should include domain name."""
prompt = get_synthesis_system_prompt("sexual_health")
assert "sexual health" in prompt.lower()
@pytest.mark.unit
class TestFormatSynthesisPrompt:
"""Tests for synthesis user prompt formatting."""
def test_includes_query(self) -> None:
"""User prompt should include the research query."""
prompt = format_synthesis_prompt(
query="testosterone libido",
evidence_summary="Study shows efficacy...",
drug_candidates=["Testosterone"],
key_findings=["Improved libido"],
mechanism_score=8,
clinical_score=7,
confidence=0.85,
)
assert "testosterone libido" in prompt
def test_includes_evidence_summary(self) -> None:
"""User prompt should include evidence summary."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="Study by Smith et al. shows significant results...",
drug_candidates=[],
key_findings=[],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "Study by Smith et al." in prompt
def test_includes_drug_candidates(self) -> None:
"""User prompt should include drug candidates."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=["Testosterone", "Flibanserin"],
key_findings=[],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "Testosterone" in prompt
assert "Flibanserin" in prompt
def test_includes_key_findings(self) -> None:
"""User prompt should include key findings."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=[],
key_findings=["Improved libido in postmenopausal women", "Safe profile"],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "Improved libido in postmenopausal women" in prompt
assert "Safe profile" in prompt
def test_includes_scores(self) -> None:
"""User prompt should include assessment scores."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=[],
key_findings=[],
mechanism_score=8,
clinical_score=7,
confidence=0.85,
)
assert "8/10" in prompt
assert "7/10" in prompt
assert "85%" in prompt
def test_handles_empty_candidates(self) -> None:
"""User prompt should handle empty drug candidates."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=[],
key_findings=[],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "None identified" in prompt
def test_handles_empty_findings(self) -> None:
"""User prompt should handle empty key findings."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=[],
key_findings=[],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "No specific findings" in prompt
def test_includes_few_shot_example(self) -> None:
"""User prompt should include few-shot example."""
prompt = format_synthesis_prompt(
query="test query",
evidence_summary="...",
drug_candidates=[],
key_findings=[],
mechanism_score=5,
clinical_score=5,
confidence=0.5,
)
assert "Alprostadil" in prompt # From the few-shot example
@pytest.mark.unit
class TestFewShotExample:
"""Tests for the few-shot example quality."""
def test_few_shot_is_mostly_narrative(self) -> None:
"""Few-shot example should be mostly prose paragraphs, not bullets."""
# Count substantial paragraphs (>100 chars of prose)
paragraphs = [p for p in FEW_SHOT_EXAMPLE.split("\n\n") if len(p) > 100]
# Count bullet points
bullets = FEW_SHOT_EXAMPLE.count("\n- ") + FEW_SHOT_EXAMPLE.count("\n1. ")
# Prose should dominate - at least as many paragraphs as bullets
assert len(paragraphs) >= bullets, "Few-shot example should be mostly narrative prose"
def test_few_shot_has_executive_summary(self) -> None:
"""Few-shot example should demonstrate executive summary."""
assert "Executive Summary" in FEW_SHOT_EXAMPLE
def test_few_shot_has_background(self) -> None:
"""Few-shot example should demonstrate background section."""
assert "Background" in FEW_SHOT_EXAMPLE
def test_few_shot_has_evidence_synthesis(self) -> None:
"""Few-shot example should demonstrate evidence synthesis."""
assert "Evidence Synthesis" in FEW_SHOT_EXAMPLE
assert "Mechanism of Action" in FEW_SHOT_EXAMPLE
def test_few_shot_has_recommendations(self) -> None:
"""Few-shot example should demonstrate recommendations."""
assert "Recommendations" in FEW_SHOT_EXAMPLE
def test_few_shot_has_limitations(self) -> None:
"""Few-shot example should demonstrate limitations."""
assert "Limitations" in FEW_SHOT_EXAMPLE
def test_few_shot_has_references(self) -> None:
"""Few-shot example should demonstrate references format."""
assert "References" in FEW_SHOT_EXAMPLE
assert "pubmed.ncbi.nlm.nih.gov" in FEW_SHOT_EXAMPLE
def test_few_shot_includes_statistics(self) -> None:
"""Few-shot example should demonstrate statistical reporting."""
assert "%" in FEW_SHOT_EXAMPLE # Percentages
assert "p<" in FEW_SHOT_EXAMPLE or "p=" in FEW_SHOT_EXAMPLE # P-values
assert "CI" in FEW_SHOT_EXAMPLE # Confidence intervals
|