|
|
"""Tests for narrative synthesis prompts.""" |
|
|
|
|
|
import pytest |
|
|
|
|
|
from src.prompts.synthesis import ( |
|
|
FEW_SHOT_EXAMPLE, |
|
|
format_synthesis_prompt, |
|
|
get_synthesis_system_prompt, |
|
|
) |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestSynthesisSystemPrompt: |
|
|
"""Tests for synthesis system prompt generation.""" |
|
|
|
|
|
def test_system_prompt_emphasizes_prose(self) -> None: |
|
|
"""System prompt should emphasize prose paragraphs, not bullets.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "PROSE PARAGRAPHS" in prompt |
|
|
assert "not bullet points" in prompt.lower() |
|
|
|
|
|
def test_system_prompt_requires_executive_summary(self) -> None: |
|
|
"""System prompt should require executive summary section.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "Executive Summary" in prompt |
|
|
assert "REQUIRED" in prompt |
|
|
|
|
|
def test_system_prompt_requires_background(self) -> None: |
|
|
"""System prompt should require background section.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "Background" in prompt |
|
|
|
|
|
def test_system_prompt_requires_evidence_synthesis(self) -> None: |
|
|
"""System prompt should require evidence synthesis section.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "Evidence Synthesis" in prompt |
|
|
assert "Mechanism of Action" in prompt |
|
|
|
|
|
def test_system_prompt_requires_recommendations(self) -> None: |
|
|
"""System prompt should require recommendations section.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "Recommendations" in prompt |
|
|
|
|
|
def test_system_prompt_requires_limitations(self) -> None: |
|
|
"""System prompt should require limitations section.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "Limitations" in prompt |
|
|
|
|
|
def test_system_prompt_warns_about_hallucination(self) -> None: |
|
|
"""System prompt should warn about citation hallucination.""" |
|
|
prompt = get_synthesis_system_prompt() |
|
|
assert "NEVER hallucinate" in prompt or "never hallucinate" in prompt.lower() |
|
|
|
|
|
def test_system_prompt_includes_domain_name(self) -> None: |
|
|
"""System prompt should include domain name.""" |
|
|
prompt = get_synthesis_system_prompt("sexual_health") |
|
|
assert "sexual health" in prompt.lower() |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestFormatSynthesisPrompt: |
|
|
"""Tests for synthesis user prompt formatting.""" |
|
|
|
|
|
def test_includes_query(self) -> None: |
|
|
"""User prompt should include the research query.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="testosterone libido", |
|
|
evidence_summary="Study shows efficacy...", |
|
|
drug_candidates=["Testosterone"], |
|
|
key_findings=["Improved libido"], |
|
|
mechanism_score=8, |
|
|
clinical_score=7, |
|
|
confidence=0.85, |
|
|
) |
|
|
assert "testosterone libido" in prompt |
|
|
|
|
|
def test_includes_evidence_summary(self) -> None: |
|
|
"""User prompt should include evidence summary.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="Study by Smith et al. shows significant results...", |
|
|
drug_candidates=[], |
|
|
key_findings=[], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "Study by Smith et al." in prompt |
|
|
|
|
|
def test_includes_drug_candidates(self) -> None: |
|
|
"""User prompt should include drug candidates.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=["Testosterone", "Flibanserin"], |
|
|
key_findings=[], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "Testosterone" in prompt |
|
|
assert "Flibanserin" in prompt |
|
|
|
|
|
def test_includes_key_findings(self) -> None: |
|
|
"""User prompt should include key findings.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=[], |
|
|
key_findings=["Improved libido in postmenopausal women", "Safe profile"], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "Improved libido in postmenopausal women" in prompt |
|
|
assert "Safe profile" in prompt |
|
|
|
|
|
def test_includes_scores(self) -> None: |
|
|
"""User prompt should include assessment scores.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=[], |
|
|
key_findings=[], |
|
|
mechanism_score=8, |
|
|
clinical_score=7, |
|
|
confidence=0.85, |
|
|
) |
|
|
assert "8/10" in prompt |
|
|
assert "7/10" in prompt |
|
|
assert "85%" in prompt |
|
|
|
|
|
def test_handles_empty_candidates(self) -> None: |
|
|
"""User prompt should handle empty drug candidates.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=[], |
|
|
key_findings=[], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "None identified" in prompt |
|
|
|
|
|
def test_handles_empty_findings(self) -> None: |
|
|
"""User prompt should handle empty key findings.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=[], |
|
|
key_findings=[], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "No specific findings" in prompt |
|
|
|
|
|
def test_includes_few_shot_example(self) -> None: |
|
|
"""User prompt should include few-shot example.""" |
|
|
prompt = format_synthesis_prompt( |
|
|
query="test query", |
|
|
evidence_summary="...", |
|
|
drug_candidates=[], |
|
|
key_findings=[], |
|
|
mechanism_score=5, |
|
|
clinical_score=5, |
|
|
confidence=0.5, |
|
|
) |
|
|
assert "Alprostadil" in prompt |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
class TestFewShotExample: |
|
|
"""Tests for the few-shot example quality.""" |
|
|
|
|
|
def test_few_shot_is_mostly_narrative(self) -> None: |
|
|
"""Few-shot example should be mostly prose paragraphs, not bullets.""" |
|
|
|
|
|
paragraphs = [p for p in FEW_SHOT_EXAMPLE.split("\n\n") if len(p) > 100] |
|
|
|
|
|
bullets = FEW_SHOT_EXAMPLE.count("\n- ") + FEW_SHOT_EXAMPLE.count("\n1. ") |
|
|
|
|
|
|
|
|
assert len(paragraphs) >= bullets, "Few-shot example should be mostly narrative prose" |
|
|
|
|
|
def test_few_shot_has_executive_summary(self) -> None: |
|
|
"""Few-shot example should demonstrate executive summary.""" |
|
|
assert "Executive Summary" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_has_background(self) -> None: |
|
|
"""Few-shot example should demonstrate background section.""" |
|
|
assert "Background" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_has_evidence_synthesis(self) -> None: |
|
|
"""Few-shot example should demonstrate evidence synthesis.""" |
|
|
assert "Evidence Synthesis" in FEW_SHOT_EXAMPLE |
|
|
assert "Mechanism of Action" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_has_recommendations(self) -> None: |
|
|
"""Few-shot example should demonstrate recommendations.""" |
|
|
assert "Recommendations" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_has_limitations(self) -> None: |
|
|
"""Few-shot example should demonstrate limitations.""" |
|
|
assert "Limitations" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_has_references(self) -> None: |
|
|
"""Few-shot example should demonstrate references format.""" |
|
|
assert "References" in FEW_SHOT_EXAMPLE |
|
|
assert "pubmed.ncbi.nlm.nih.gov" in FEW_SHOT_EXAMPLE |
|
|
|
|
|
def test_few_shot_includes_statistics(self) -> None: |
|
|
"""Few-shot example should demonstrate statistical reporting.""" |
|
|
assert "%" in FEW_SHOT_EXAMPLE |
|
|
assert "p<" in FEW_SHOT_EXAMPLE or "p=" in FEW_SHOT_EXAMPLE |
|
|
assert "CI" in FEW_SHOT_EXAMPLE |
|
|
|