|
|
from unittest.mock import patch |
|
|
|
|
|
import pytest |
|
|
|
|
|
from src.prompts.judge import format_user_prompt, select_evidence_for_judge |
|
|
from src.utils.models import Citation, Evidence |
|
|
|
|
|
|
|
|
def make_evidence(title: str, content: str = "content") -> Evidence: |
|
|
return Evidence( |
|
|
content=content, |
|
|
citation=Citation(title=title, url="http://test.com", date="2025", source="pubmed"), |
|
|
) |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
@pytest.mark.asyncio |
|
|
async def test_evidence_selection_diverse(): |
|
|
"""Verify evidence selection includes early and recent items (fallback logic).""" |
|
|
|
|
|
evidence = [make_evidence(f"Paper {i}") for i in range(100)] |
|
|
|
|
|
|
|
|
with patch("src.utils.text_utils.select_diverse_evidence", side_effect=ImportError): |
|
|
selected = await select_evidence_for_judge(evidence, "test query", max_items=30) |
|
|
|
|
|
assert len(selected) == 30 |
|
|
|
|
|
|
|
|
titles = [e.citation.title for e in selected] |
|
|
|
|
|
|
|
|
early_papers = {f"Paper {i}" for i in range(10)} |
|
|
has_early = any(title in early_papers for title in titles) |
|
|
|
|
|
late_papers = {f"Paper {i}" for i in range(90, 100)} |
|
|
has_late = any(title in late_papers for title in titles) |
|
|
|
|
|
assert has_early, "Should include early evidence" |
|
|
assert has_late, "Should include recent evidence" |
|
|
|
|
|
|
|
|
@pytest.mark.unit |
|
|
def test_prompt_includes_question_at_edges(): |
|
|
"""Verify lost-in-the-middle mitigation in prompt formatting.""" |
|
|
evidence = [make_evidence("Test Paper")] |
|
|
question = "CRITICAL RESEARCH QUESTION" |
|
|
|
|
|
prompt = format_user_prompt(question, evidence, iteration=5, max_iterations=10) |
|
|
|
|
|
|
|
|
lines = prompt.split("\n") |
|
|
|
|
|
|
|
|
start_content = "\n".join(lines[:10]) |
|
|
assert question in start_content |
|
|
|
|
|
|
|
|
|
|
|
|