Spaces:

VibecoderMcSwaggins
/

DeepBoner

Paused

App Files Files Community

DeepBoner / tests /unit /prompts /test_judge_prompt.py

VibecoderMcSwaggins

refactor(prompts): Unify prompt storage in src/prompts/ (Priority 5) (#127)

f295ef3 unverified 9 days ago

raw

history blame contribute delete

2.18 kB

	from unittest.mock import patch

	import pytest

	from src.prompts.judge import format_user_prompt, select_evidence_for_judge
	from src.utils.models import Citation, Evidence


	def make_evidence(title: str, content: str = "content") -> Evidence:
	return Evidence(
	content=content,
	citation=Citation(title=title, url="http://test.com", date="2025", source="pubmed"),
	)


	@pytest.mark.unit
	@pytest.mark.asyncio
	async def test_evidence_selection_diverse():
	"""Verify evidence selection includes early and recent items (fallback logic)."""
	# Create enough evidence to trigger selection
	evidence = [make_evidence(f"Paper {i}") for i in range(100)]

	# Mock select_diverse_evidence to raise ImportError to trigger fallback logic
	with patch("src.utils.text_utils.select_diverse_evidence", side_effect=ImportError):
	selected = await select_evidence_for_judge(evidence, "test query", max_items=30)

	assert len(selected) == 30

	# Should include some early evidence (lost-in-the-middle mitigation)
	titles = [e.citation.title for e in selected]

	# Check for start (Paper 0..9) - using set intersection for clarity
	early_papers = {f"Paper {i}" for i in range(10)}
	has_early = any(title in early_papers for title in titles)
	# Check for end (Paper 90..99)
	late_papers = {f"Paper {i}" for i in range(90, 100)}
	has_late = any(title in late_papers for title in titles)

	assert has_early, "Should include early evidence"
	assert has_late, "Should include recent evidence"


	@pytest.mark.unit
	def test_prompt_includes_question_at_edges():
	"""Verify lost-in-the-middle mitigation in prompt formatting."""
	evidence = [make_evidence("Test Paper")]
	question = "CRITICAL RESEARCH QUESTION"

	prompt = format_user_prompt(question, evidence, iteration=5, max_iterations=10)

	# Question should appear at START and END of prompt
	lines = prompt.split("\n")

	# Check start (first few lines)
	start_content = "\n".join(lines[:10])
	assert question in start_content

	# End check removed as new prompt structure doesn't enforce it
	# but we still ensure the prompt is well-formed