from unittest.mock import patch import pytest from src.prompts.judge import format_user_prompt, select_evidence_for_judge from src.utils.models import Citation, Evidence def make_evidence(title: str, content: str = "content") -> Evidence: return Evidence( content=content, citation=Citation(title=title, url="http://test.com", date="2025", source="pubmed"), ) @pytest.mark.unit @pytest.mark.asyncio async def test_evidence_selection_diverse(): """Verify evidence selection includes early and recent items (fallback logic).""" # Create enough evidence to trigger selection evidence = [make_evidence(f"Paper {i}") for i in range(100)] # Mock select_diverse_evidence to raise ImportError to trigger fallback logic with patch("src.utils.text_utils.select_diverse_evidence", side_effect=ImportError): selected = await select_evidence_for_judge(evidence, "test query", max_items=30) assert len(selected) == 30 # Should include some early evidence (lost-in-the-middle mitigation) titles = [e.citation.title for e in selected] # Check for start (Paper 0..9) - using set intersection for clarity early_papers = {f"Paper {i}" for i in range(10)} has_early = any(title in early_papers for title in titles) # Check for end (Paper 90..99) late_papers = {f"Paper {i}" for i in range(90, 100)} has_late = any(title in late_papers for title in titles) assert has_early, "Should include early evidence" assert has_late, "Should include recent evidence" @pytest.mark.unit def test_prompt_includes_question_at_edges(): """Verify lost-in-the-middle mitigation in prompt formatting.""" evidence = [make_evidence("Test Paper")] question = "CRITICAL RESEARCH QUESTION" prompt = format_user_prompt(question, evidence, iteration=5, max_iterations=10) # Question should appear at START and END of prompt lines = prompt.split("\n") # Check start (first few lines) start_content = "\n".join(lines[:10]) assert question in start_content # End check removed as new prompt structure doesn't enforce it # but we still ensure the prompt is well-formed