File size: 5,616 Bytes
d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 9760706 d7e5abb 9760706 d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb 5cac97d d7e5abb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
"""Judge prompts for evidence assessment."""
from src.utils.models import Evidence
SYSTEM_PROMPT = """You are an expert drug repurposing research judge.
Your task is to SCORE evidence from biomedical literature. You do NOT decide whether to
continue searching or synthesize - that decision is made by the orchestration system
based on your scores.
## Your Role: Scoring Only
You provide objective scores. The system decides next steps based on explicit thresholds.
This separation prevents bias in the decision-making process.
## Scoring Criteria
1. **Mechanism Score (0-10)**: How well does the evidence explain the biological mechanism?
- 0-3: No clear mechanism, speculative
- 4-6: Some mechanistic insight, but gaps exist
- 7-10: Clear, well-supported mechanism of action
2. **Clinical Evidence Score (0-10)**: Strength of clinical/preclinical support?
- 0-3: No clinical data, only theoretical
- 4-6: Preclinical or early clinical data
- 7-10: Strong clinical evidence (trials, meta-analyses)
3. **Drug Candidates**: List SPECIFIC drug names mentioned in the evidence
- Only include drugs explicitly mentioned
- Do NOT hallucinate or infer drug names
- Include drug class if specific names aren't available (e.g., "SSRI antidepressants")
4. **Key Findings**: Extract 3-5 key findings from the evidence
- Focus on findings relevant to the research question
- Include mechanism insights and clinical outcomes
5. **Confidence (0.0-1.0)**: Your confidence in the scores
- Based on evidence quality and relevance
- Lower if evidence is tangential or low-quality
## Output Format
Return valid JSON with these fields:
- details.mechanism_score (int 0-10)
- details.mechanism_reasoning (string)
- details.clinical_evidence_score (int 0-10)
- details.clinical_reasoning (string)
- details.drug_candidates (list of strings)
- details.key_findings (list of strings)
- sufficient (boolean) - TRUE if scores suggest enough evidence
- confidence (float 0-1)
- recommendation ("continue" or "synthesize") - Your suggestion (system may override)
- next_search_queries (list) - If continuing, suggest FOCUSED queries
- reasoning (string)
## CRITICAL: Search Query Rules
When suggesting next_search_queries:
- STAY FOCUSED on the original research question
- Do NOT drift to tangential topics
- If question is about "female libido", do NOT suggest "bone health" or "muscle mass"
- Refine existing terms, don't explore random medical associations
"""
MAX_EVIDENCE_FOR_JUDGE = 30 # Keep under token limits
async def select_evidence_for_judge(
evidence: list[Evidence],
query: str,
max_items: int = MAX_EVIDENCE_FOR_JUDGE,
) -> list[Evidence]:
"""
Select diverse, relevant evidence for judge evaluation.
Implements RAG best practices:
- Diversity selection over recency-only
- Lost-in-the-middle mitigation
- Relevance re-ranking
"""
if len(evidence) <= max_items:
return evidence
try:
from src.utils.text_utils import select_diverse_evidence
# Use embedding-based diversity selection
return await select_diverse_evidence(evidence, n=max_items, query=query)
except ImportError:
# Fallback: mix of recent + early (lost-in-the-middle mitigation)
early = evidence[: max_items // 3] # First third
recent = evidence[-(max_items * 2 // 3) :] # Last two-thirds
return early + recent
def format_user_prompt(
question: str,
evidence: list[Evidence],
iteration: int = 0,
max_iterations: int = 10,
total_evidence_count: int | None = None,
) -> str:
"""
Format user prompt with selected evidence and iteration context.
NOTE: Evidence should be pre-selected using select_evidence_for_judge().
This function assumes evidence is already capped.
"""
total_count = total_evidence_count or len(evidence)
max_content_len = 1500
def format_single_evidence(i: int, e: Evidence) -> str:
content = e.content
if len(content) > max_content_len:
content = content[:max_content_len] + "..."
return (
f"### Evidence {i + 1}\n"
f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
f"**URL**: {e.citation.url}\n"
f"**Content**:\n{content}"
)
evidence_text = "\n\n".join([format_single_evidence(i, e) for i, e in enumerate(evidence)])
# Lost-in-the-middle mitigation: put critical context at START and END
return f"""## Research Question (IMPORTANT - stay focused on this)
{question}
## Search Progress
- **Iteration**: {iteration}/{max_iterations}
- **Total evidence collected**: {total_count} sources
- **Evidence shown below**: {len(evidence)} diverse sources (selected for relevance)
## Available Evidence
{evidence_text}
## Your Task
Score this evidence for drug repurposing potential. Provide ONLY scores and extracted data.
DO NOT decide "synthesize" vs "continue" - that decision is made by the system.
## REMINDER: Original Question (stay focused)
{question}
"""
def format_empty_evidence_prompt(question: str) -> str:
"""
Format prompt when no evidence was found.
Args:
question: The user's research question
Returns:
Formatted prompt string
"""
return f"""## Research Question
{question}
## Available Evidence
No evidence was found from the search.
## Your Task
Since no evidence was found, recommend search queries that might yield better results.
Set sufficient=False and recommendation=\"continue\".
Suggest 3-5 specific search queries.
"""
|